aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/ifunc-memcmp.h
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-03-05 07:20:28 -0800
committerH.J. Lu <hjl.tools@gmail.com>2021-03-29 07:40:17 -0700
commit91264fe3577fe887b4860923fa6142b5274c8965 (patch)
tree172c0193a4e0a6b0e695fd8e0df51f0d746dd817 /sysdeps/x86_64/multiarch/ifunc-memcmp.h
parent1b968b6b9b3aac702ac2f133e0dd16cfdbb415ee (diff)
downloadglibc-91264fe3577fe887b4860923fa6142b5274c8965.zip
glibc-91264fe3577fe887b4860923fa6142b5274c8965.tar.gz
glibc-91264fe3577fe887b4860923fa6142b5274c8965.tar.bz2
x86-64: Add memcmp family functions with 256-bit EVEX
Update ifunc-memcmp.h to select the function optimized with 256-bit EVEX instructions using YMM16-YMM31 registers to avoid RTM abort with usable AVX512VL, AVX512BW and MOVBE since VZEROUPPER isn't needed at function exit.
Diffstat (limited to 'sysdeps/x86_64/multiarch/ifunc-memcmp.h')
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memcmp.h13
1 files changed, 10 insertions, 3 deletions
diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
index d5df541..5ac41a1 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
@@ -23,17 +23,24 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
- return OPTIMIZE (avx2_movbe);
+ {
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+ return OPTIMIZE (evex_movbe);
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE (avx2_movbe);
+ }
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
return OPTIMIZE (sse4_1);