aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sysdeps/x86_64/memset.S45
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c249
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-memset.h45
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-wmemset.h21
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S5
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S4
-rw-r--r--sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S12
-rw-r--r--sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S12
-rw-r--r--sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S57
-rw-r--r--sysdeps/x86_64/multiarch/rtld-memset.S18
10 files changed, 265 insertions, 203 deletions
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index a6eea61..f4e1bab 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -18,47 +18,18 @@
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
-#define USE_WITH_SSE2 1
-#define VEC_SIZE 16
-#define MOV_SIZE 3
-#define RET_SIZE 1
+#define MEMSET_SYMBOL(p,s) memset
+#define MEMSET_CHK_SYMBOL(p,s) p
-#define VEC(i) xmm##i
-#define VMOVU movups
-#define VMOVA movaps
+#define WMEMSET_SYMBOL(p,s) __wmemset
+#define WMEMSET_CHK_SYMBOL(p,s) p
-# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
- movd d, %xmm0; \
- movq r, %rax; \
- punpcklbw %xmm0, %xmm0; \
- punpcklwd %xmm0, %xmm0; \
- pshufd $0, %xmm0, %xmm0
+#define DEFAULT_IMPL_V1 "multiarch/memset-sse2-unaligned-erms.S"
+#define DEFAULT_IMPL_V3 "multiarch/memset-avx2-unaligned-erms.S"
+#define DEFAULT_IMPL_V4 "multiarch/memset-evex-unaligned-erms.S"
-# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
- movd d, %xmm0; \
- pshufd $0, %xmm0, %xmm0; \
- movq r, %rax
-
-# define MEMSET_VDUP_TO_VEC0_HIGH()
-# define MEMSET_VDUP_TO_VEC0_LOW()
-
-# define WMEMSET_VDUP_TO_VEC0_HIGH()
-# define WMEMSET_VDUP_TO_VEC0_LOW()
-
-#define SECTION(p) p
-
-#ifndef MEMSET_SYMBOL
-# define MEMSET_CHK_SYMBOL(p,s) p
-# define MEMSET_SYMBOL(p,s) memset
-#endif
-
-#ifndef WMEMSET_SYMBOL
-# define WMEMSET_CHK_SYMBOL(p,s) p
-# define WMEMSET_SYMBOL(p,s) __wmemset
-#endif
-
-#include "multiarch/memset-vec-unaligned-erms.S"
+#include "isa-default-impl.h"
libc_hidden_builtin_def (memset)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 7858aa3..21008c7 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -213,94 +213,99 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, __memset_chk,
IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
__memset_chk_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
- __memset_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
- __memset_chk_sse2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- CPU_FEATURE_USABLE (AVX2),
- __memset_chk_avx2_unaligned)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- CPU_FEATURE_USABLE (AVX2),
- __memset_chk_avx2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memset_chk_avx2_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memset_chk_avx2_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_chk_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_chk_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_chk_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_chk_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, __memset_chk,
- CPU_FEATURE_USABLE (AVX512F),
- __memset_chk_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_chk_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_chk_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memset_chk_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_chk_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_chk_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_chk_avx2_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_chk_avx2_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memset_chk_avx2_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __memset_chk,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memset_chk_avx2_unaligned_erms_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memset_chk, 1,
+ __memset_chk_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __memset_chk, 1,
+ __memset_chk_sse2_unaligned_erms)
)
#endif
/* Support sysdeps/x86_64/multiarch/memset.c. */
IFUNC_IMPL (i, name, memset,
IFUNC_IMPL_ADD (array, i, memset, 1,
- __memset_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, memset, 1,
- __memset_sse2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms)
- IFUNC_IMPL_ADD (array, i, memset,
- CPU_FEATURE_USABLE (AVX2),
- __memset_avx2_unaligned)
- IFUNC_IMPL_ADD (array, i, memset,
- CPU_FEATURE_USABLE (AVX2),
- __memset_avx2_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memset_avx2_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __memset_avx2_unaligned_erms_rtm)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_evex_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_avx512_unaligned_erms)
- IFUNC_IMPL_ADD (array, i, memset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __memset_avx512_unaligned)
- IFUNC_IMPL_ADD (array, i, memset,
- CPU_FEATURE_USABLE (AVX512F),
- __memset_avx512_no_vzeroupper)
+ __memset_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_avx512_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memset_avx512_no_vzeroupper)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __memset_evex_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_avx2_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_avx2_unaligned_erms)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memset_avx2_unaligned_rtm)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, memset,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __memset_avx2_unaligned_erms_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memset, 1,
+ __memset_sse2_unaligned)
+ X86_IFUNC_IMPL_ADD_V2 (array, i, memset, 1,
+ __memset_sse2_unaligned_erms)
)
/* Support sysdeps/x86_64/multiarch/rawmemchr.c. */
@@ -821,25 +826,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wmemset.c. */
IFUNC_IMPL (i, name, wmemset,
- IFUNC_IMPL_ADD (array, i, wmemset, 1,
- __wmemset_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, wmemset,
- CPU_FEATURE_USABLE (AVX2),
- __wmemset_avx2_unaligned)
- IFUNC_IMPL_ADD (array, i, wmemset,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wmemset_avx2_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, wmemset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wmemset_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, wmemset,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wmemset_avx512_unaligned))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wmemset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wmemset_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, wmemset,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wmemset_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wmemset,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemset_avx2_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, wmemset,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wmemset_avx2_unaligned_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, wmemset, 1,
+ __wmemset_sse2_unaligned))
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/memcpy_chk.c. */
@@ -1049,25 +1056,27 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#ifdef SHARED
/* Support sysdeps/x86_64/multiarch/wmemset_chk.c. */
IFUNC_IMPL (i, name, __wmemset_chk,
- IFUNC_IMPL_ADD (array, i, __wmemset_chk, 1,
- __wmemset_chk_sse2_unaligned)
- IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- CPU_FEATURE_USABLE (AVX2),
- __wmemset_chk_avx2_unaligned)
- IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- (CPU_FEATURE_USABLE (AVX2)
- && CPU_FEATURE_USABLE (RTM)),
- __wmemset_chk_avx2_unaligned_rtm)
- IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wmemset_chk_evex_unaligned)
- IFUNC_IMPL_ADD (array, i, __wmemset_chk,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (BMI2)),
- __wmemset_chk_avx512_unaligned))
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __wmemset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wmemset_chk_evex_unaligned)
+ X86_IFUNC_IMPL_ADD_V4 (array, i, __wmemset_chk,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wmemset_chk_avx512_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __wmemset_chk,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemset_chk_avx2_unaligned)
+ X86_IFUNC_IMPL_ADD_V3 (array, i, __wmemset_chk,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wmemset_chk_avx2_unaligned_rtm)
+ /* ISA V2 wrapper for SSE2 implementation because the SSE2
+ implementation is also used at ISA level 2. */
+ X86_IFUNC_IMPL_ADD_V2 (array, i, __wmemset_chk, 1,
+ __wmemset_chk_sse2_unaligned))
#endif
return 0;
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
index 64d1799..ed51497 100644
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
@@ -20,10 +20,19 @@
#include <init-arch.h>
extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+ attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
attribute_hidden;
@@ -31,31 +40,26 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
- attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
return OPTIMIZE (erms);
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
&& !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (avx512_unaligned_erms);
@@ -66,11 +70,11 @@ IFUNC_SELECTOR (void)
return OPTIMIZE (avx512_no_vzeroupper);
}
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (evex_unaligned_erms);
@@ -86,7 +90,8 @@ IFUNC_SELECTOR (void)
return OPTIMIZE (avx2_unaligned_rtm);
}
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
{
if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
return OPTIMIZE (avx2_unaligned_erms);
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
index 87c48e2..3810c71 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
@@ -18,22 +18,26 @@
#include <init-arch.h>
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
+
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
- const struct cpu_features* cpu_features = __get_cpu_features ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ AVX_Fast_Unaligned_Load, !))
{
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
+ if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
{
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
return OPTIMIZE (avx512_unaligned);
@@ -44,7 +48,8 @@ IFUNC_SELECTOR (void)
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2_unaligned_rtm);
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
+ Prefer_No_VZEROUPPER, !))
return OPTIMIZE (avx2_unaligned);
}
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
index c0bf287..a9054a9 100644
--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (3)
+
# define USE_WITH_AVX2 1
# define VEC_SIZE 32
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
index c5be8f5..8cc9c16 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
@@ -17,8 +17,10 @@
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
-#if IS_IN (libc)
#include "asm-syntax.h"
#ifndef MEMSET
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
index 5241216..47623b8 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
# define USE_WITH_AVX512 1
# define VEC_SIZE 64
@@ -30,8 +33,15 @@
# define WMEMSET_VDUP_TO_VEC0_LOW()
# define SECTION(p) p##.evex512
+
+#ifndef MEMSET_SYMBOL
# define MEMSET_SYMBOL(p,s) p##_avx512_##s
+#endif
+#ifndef WMEMSET_SYMBOL
# define WMEMSET_SYMBOL(p,s) p##_avx512_##s
+#endif
+
+
# define USE_LESS_VEC_MASK_STORE 1
# include "memset-vec-unaligned-erms.S"
#endif
diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
index 6370021..ac4b2d2 100644
--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
@@ -1,4 +1,7 @@
-#if IS_IN (libc)
+#include <isa-level.h>
+
+#if ISA_SHOULD_BUILD (4)
+
# define USE_WITH_EVEX 1
# define VEC_SIZE 32
@@ -30,8 +33,15 @@
# define WMEMSET_VDUP_TO_VEC0_LOW()
# define SECTION(p) p##.evex
+
+#ifndef MEMSET_SYMBOL
# define MEMSET_SYMBOL(p,s) p##_evex_##s
+#endif
+#ifndef WMEMSET_SYMBOL
# define WMEMSET_SYMBOL(p,s) p##_evex_##s
+#endif
+
+
# define USE_LESS_VEC_MASK_STORE 1
# include "memset-vec-unaligned-erms.S"
#endif
diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
index 3d92f69..44f9b88 100644
--- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
@@ -17,22 +17,51 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <shlib-compat.h>
+#include <isa-level.h>
-#if IS_IN (libc)
-# define MEMSET_SYMBOL(p,s) p##_sse2_##s
-# define WMEMSET_SYMBOL(p,s) p##_sse2_##s
+/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
+ so we need this to build for ISA V2 builds. */
+#if ISA_SHOULD_BUILD (2)
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name)
+# include <sysdep.h>
+# define USE_WITH_SSE2 1
+
+# define VEC_SIZE 16
+# define MOV_SIZE 3
+# define RET_SIZE 1
+
+# define VEC(i) xmm##i
+# define VMOVU movups
+# define VMOVA movaps
+
+# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ movd d, %xmm0; \
+ movq r, %rax; \
+ punpcklbw %xmm0, %xmm0; \
+ punpcklwd %xmm0, %xmm0; \
+ pshufd $0, %xmm0, %xmm0
+
+# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ movd d, %xmm0; \
+ pshufd $0, %xmm0, %xmm0; \
+ movq r, %rax
+
+# define MEMSET_VDUP_TO_VEC0_HIGH()
+# define MEMSET_VDUP_TO_VEC0_LOW()
+
+# define WMEMSET_VDUP_TO_VEC0_HIGH()
+# define WMEMSET_VDUP_TO_VEC0_LOW()
+
+# define SECTION(p) p
+
+# ifndef MEMSET_SYMBOL
+# define MEMSET_SYMBOL(p,s) p##_sse2_##s
# endif
-# undef weak_alias
-# define weak_alias(original, alias)
-# undef strong_alias
-# define strong_alias(ignored1, ignored2)
-#endif
+# ifndef WMEMSET_SYMBOL
+# define WMEMSET_SYMBOL(p,s) p##_sse2_##s
+# endif
+
+# include "memset-vec-unaligned-erms.S"
-#include <sysdeps/x86_64/memset.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/rtld-memset.S b/sysdeps/x86_64/multiarch/rtld-memset.S
new file mode 100644
index 0000000..d912bfa
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rtld-memset.S
@@ -0,0 +1,18 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "../memset.S"