aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2023-09-20 15:44:50 -0500
committerNoah Goldstein <goldstein.w.n@gmail.com>2023-09-29 14:18:42 -0500
commitd90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff (patch)
tree0015cc47ecb9affe00f1d8afbebf4a5c16d20e25
parent5f913506f4bf4785f9cf2c2ac8d17dc9f877ff17 (diff)
downloadglibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.zip
glibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.tar.gz
glibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.tar.bz2
x86: Add support for AVX10 preset and vec size in cpu-features
This commit add support for the new AVX10 cpu features: https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf We add checks for: - `AVX10`: Check if AVX10 is present. - `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support. `make check` passes and cpuid output was checked against GNR/DMR on an emulator.
-rw-r--r--manual/platform.texi12
-rw-r--r--sysdeps/x86/bits/platform/x86.h14
-rw-r--r--sysdeps/x86/cpu-features.c25
-rw-r--r--sysdeps/x86/include/cpu-features.h27
-rw-r--r--sysdeps/x86/tst-get-cpu-features.c8
5 files changed, 83 insertions, 3 deletions
diff --git a/manual/platform.texi b/manual/platform.texi
index 2a2d557..478b6fd 100644
--- a/manual/platform.texi
+++ b/manual/platform.texi
@@ -223,6 +223,18 @@ Leaf (EAX = 23H).
@code{AVX} -- The AVX instruction extensions.
@item
+@code{AVX10} -- The AVX10 instruction extensions.
+
+@item
+@code{AVX10_XMM} -- Whether AVX10 includes xmm registers.
+
+@item
+@code{AVX10_YMM} -- Whether AVX10 includes ymm registers.
+
+@item
+@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers.
+
+@item
@code{AVX2} -- The AVX2 instruction extensions.
@item
diff --git a/sysdeps/x86/bits/platform/x86.h b/sysdeps/x86/bits/platform/x86.h
index 88ca071..1e23d53 100644
--- a/sysdeps/x86/bits/platform/x86.h
+++ b/sysdeps/x86/bits/platform/x86.h
@@ -30,7 +30,8 @@ enum
CPUID_INDEX_80000008,
CPUID_INDEX_7_ECX_1,
CPUID_INDEX_19,
- CPUID_INDEX_14_ECX_0
+ CPUID_INDEX_14_ECX_0,
+ CPUID_INDEX_24_ECX_0
};
struct cpuid_feature
@@ -312,6 +313,7 @@ enum
x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5,
x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8,
x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14,
+ x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19,
x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21,
x86_cpu_index_19_ebx
@@ -325,5 +327,13 @@ enum
= (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
+ cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
- x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4
+ x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4,
+
+ x86_cpu_index_24_ecx_0_ebx
+ = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
+ + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
+
+ x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16,
+ x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17,
+ x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18,
};
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index badf088..0bf923d 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features)
CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
#endif
+ enum
+ {
+ os_xmm = 1,
+ os_ymm = 2,
+ os_zmm = 4
+ } os_vector_size = os_xmm;
/* Can we call xgetbv? */
if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
{
unsigned int xcrlow;
unsigned int xcrhigh;
+ CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
/* Is YMM and XMM state usable? */
if ((xcrlow & (bit_YMM_state | bit_XMM_state))
@@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features)
/* Determine if AVX is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX))
{
+ os_vector_size |= os_ymm;
CPU_FEATURE_SET (cpu_features, AVX);
/* The following features depend on AVX being usable. */
/* Determine if AVX2 is usable. */
@@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features)
| bit_ZMM16_31_state))
== (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
{
+ os_vector_size |= os_zmm;
/* Determine if AVX512F is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
{
@@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features)
}
}
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
+ && cpu_features->basic.max_cpuid >= 0x24)
+ {
+ __cpuid_count (
+ 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
+ if (os_vector_size & os_xmm)
+ CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
+ if (os_vector_size & os_ymm)
+ CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
+ if (os_vector_size & os_zmm)
+ CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
+ }
+
/* Are XTILECFG and XTILEDATA states usable? */
if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
== (bit_XTILECFG_state | bit_XTILEDATA_state))
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index eb30d34..2d7427a 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -29,7 +29,7 @@
enum
{
- CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1
+ CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
};
enum
@@ -319,6 +319,7 @@ enum
#define bit_cpu_AVX_NE_CONVERT (1u << 5)
#define bit_cpu_AMX_COMPLEX (1u << 8)
#define bit_cpu_PREFETCHI (1u << 14)
+#define bit_cpu_AVX10 (1u << 19)
#define bit_cpu_APX_F (1u << 21)
/* CPUID_INDEX_19. */
@@ -332,6 +333,13 @@ enum
/* EBX. */
#define bit_cpu_PTWRITE (1u << 4)
+/* CPUID_INDEX_24_ECX_0. */
+
+/* EBX. */
+#define bit_cpu_AVX10_XMM (1u << 16)
+#define bit_cpu_AVX10_YMM (1u << 17)
+#define bit_cpu_AVX10_ZMM (1u << 18)
+
/* CPUID_INDEX_1. */
/* ECX. */
@@ -563,6 +571,7 @@ enum
#define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
#define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1
#define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1
+#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1
#define index_cpu_APX_F CPUID_INDEX_7_ECX_1
/* CPUID_INDEX_19. */
@@ -576,6 +585,13 @@ enum
/* EBX. */
#define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0
+/* CPUID_INDEX_24_ECX_0. */
+
+/* EBX. */
+#define index_cpu_AVX10_XMM CPUID_INDEX_24_ECX_0
+#define index_cpu_AVX10_YMM CPUID_INDEX_24_ECX_0
+#define index_cpu_AVX10_ZMM CPUID_INDEX_24_ECX_0
+
/* CPUID_INDEX_1. */
/* ECX. */
@@ -809,6 +825,7 @@ enum
#define reg_AVX_NE_CONVERT edx
#define reg_AMX_COMPLEX edx
#define reg_PREFETCHI edx
+#define reg_AVX10 edx
#define reg_APX_F edx
/* CPUID_INDEX_19. */
@@ -822,6 +839,14 @@ enum
/* EBX. */
#define reg_PTWRITE ebx
+/* CPUID_INDEX_24_ECX_0. */
+
+/* EBX. */
+#define reg_AVX10_XMM ebx
+#define reg_AVX10_YMM ebx
+#define reg_AVX10_ZMM ebx
+
+
/* PREFERRED_FEATURE_INDEX_1. First define the bitindex values
sequentially, then define the bit_arch* and index_arch_* lookup
constants. */
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
index b27fa73..44edd18 100644
--- a/sysdeps/x86/tst-get-cpu-features.c
+++ b/sysdeps/x86/tst-get-cpu-features.c
@@ -219,6 +219,7 @@ do_test (void)
CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
+ CHECK_CPU_FEATURE_PRESENT (AVX10);
CHECK_CPU_FEATURE_PRESENT (APX_F);
CHECK_CPU_FEATURE_PRESENT (AESKLE);
CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
@@ -391,11 +392,18 @@ do_test (void)
CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
+ CHECK_CPU_FEATURE_ACTIVE (AVX10);
CHECK_CPU_FEATURE_ACTIVE (APX_F);
CHECK_CPU_FEATURE_ACTIVE (AESKLE);
CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
+ if (CPU_FEATURE_ACTIVE (AVX10))
+ {
+ CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM);
+ CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM);
+ CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM);
+ }
return 0;
}