aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2016-03-13 00:26:57 -0800
committerH.J. Lu <hjl.tools@gmail.com>2016-03-31 09:00:47 -0700
commit0db56470f1bee39a252daf2728d818296b179a9e (patch)
tree5d376065cc6fce95724fefee7bd867abd7f05b0b
parent7df7c6a195d6bc6ffdd90db0786d5de9c67d037a (diff)
downloadglibc-hjl/erms/master.zip
glibc-hjl/erms/master.tar.gz
glibc-hjl/erms/master.tar.bz2
Add memmove/memset-avx512-unaligned-erms-no-vzeroupper.Shjl/erms/master
-rw-r--r--sysdeps/x86_64/multiarch/Makefile4
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c48
-rw-r--r--sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S12
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S19
4 files changed, 82 insertions, 1 deletions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 8878efb..0218ffa 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -24,9 +24,11 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
memmove-sse2-unaligned-erms \
memmove-avx-unaligned-erms \
memmove-avx512-unaligned-erms \
+ memmove-avx512-unaligned-erms-no-vzeroupper \
memset-sse2-unaligned-erms \
memset-avx2-unaligned-erms \
- memset-avx512-unaligned-erms
+ memset-avx512-unaligned-erms \
+ memset-avx512-unaligned-erms-no-vzeroupper
CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 1e880f6..a621ae0 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -57,7 +57,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memmove_chk_avx512_unaligned_2)
IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_chk_avx512_no_vzeroupper_unaligned_2)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__memmove_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_chk_avx512_no_vzeroupper_unaligned_erms)
#endif
IFUNC_IMPL_ADD (array, i, __memmove_chk,
HAS_ARCH_FEATURE (AVX_Usable),
@@ -101,7 +107,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memmove_avx512_unaligned_2)
IFUNC_IMPL_ADD (array, i, memmove,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_avx512_no_vzeroupper_unaligned_2)
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__memmove_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_avx512_no_vzeroupper_unaligned_erms)
#endif
IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
__memmove_ssse3_back)
@@ -137,9 +149,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memset_chk_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, __memset_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_chk_avx512_no_vzeroupper_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__memset_chk_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, __memset_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_chk_avx512_no_vzeroupper_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__memset_chk_avx512_no_vzeroupper)
#endif
)
@@ -167,9 +185,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memset_avx512_unaligned_erms)
IFUNC_IMPL_ADD (array, i, memset,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_avx512_no_vzeroupper_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memset,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__memset_avx512_unaligned)
IFUNC_IMPL_ADD (array, i, memset,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memset_avx512_no_vzeroupper_unaligned)
+ IFUNC_IMPL_ADD (array, i, memset,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__memset_avx512_no_vzeroupper)
#endif
)
@@ -338,7 +362,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memcpy_chk_avx512_unaligned_2)
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_chk_avx512_no_vzeroupper_unaligned_2)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__memcpy_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_chk_avx512_no_vzeroupper_unaligned_erms)
#endif
IFUNC_IMPL_ADD (array, i, __memcpy_chk,
HAS_ARCH_FEATURE (AVX_Usable),
@@ -386,7 +416,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memcpy_avx512_unaligned_2)
IFUNC_IMPL_ADD (array, i, memcpy,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_avx512_no_vzeroupper_unaligned_2)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__memcpy_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_avx512_no_vzeroupper_unaligned_erms)
#endif
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, memcpy, 1,
@@ -407,7 +443,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__mempcpy_chk_avx512_unaligned_2)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_chk_avx512_no_vzeroupper_unaligned_2)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__mempcpy_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_chk_avx512_no_vzeroupper_unaligned_erms)
#endif
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_ARCH_FEATURE (AVX_Usable),
@@ -442,7 +484,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__mempcpy_avx512_unaligned_2)
IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_avx512_no_vzeroupper_unaligned_2)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
__mempcpy_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_avx512_no_vzeroupper_unaligned_erms)
#endif
IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX_Usable),
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S
new file mode 100644
index 0000000..3ba2851
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms-no-vzeroupper.S
@@ -0,0 +1,12 @@
+#ifdef HAVE_AVX512_ASM_SUPPORT
+# define VEC_SIZE 64
+# define VEC(i) zmm##i
+# define VMOVU vmovdqu64
+# define VMOVA vmovdqa64
+# define VZEROUPPER
+
+# define SECTION(p) p##.avx512
+# define MEMMOVE_SYMBOL(p,s) p##_avx512_no_vzeroupper_##s
+
+# include "memmove-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S
new file mode 100644
index 0000000..db24630
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms-no-vzeroupper.S
@@ -0,0 +1,19 @@
+#ifdef HAVE_AVX512_ASM_SUPPORT
+# define VEC_SIZE 64
+# define VEC(i) zmm##i
+# define VMOVU vmovdqu64
+# define VMOVA vmovdqa64
+# define VZEROUPPER
+# define VZEROUPPER_SHORT_RETURN rep
+
+# define VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ vmovd d, %xmm0; \
+ movq r, %rax; \
+ vpbroadcastb %xmm0, %xmm0; \
+ vpbroadcastq %xmm0, %zmm0
+
+# define SECTION(p) p##.avx512
+# define MEMSET_SYMBOL(p,s) p##_avx512_no_vzeroupper_##s
+
+# include "memset-vec-unaligned-erms.S"
+#endif