diff options
author | Noah Goldstein <goldstein.w.n@gmail.com> | 2023-09-20 15:44:50 -0500 |
---|---|---|
committer | Noah Goldstein <goldstein.w.n@gmail.com> | 2023-09-29 14:18:42 -0500 |
commit | d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff (patch) | |
tree | 0015cc47ecb9affe00f1d8afbebf4a5c16d20e25 | |
parent | 5f913506f4bf4785f9cf2c2ac8d17dc9f877ff17 (diff) | |
download | glibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.zip glibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.tar.gz glibc-d90b43a4ed475dac5b0cd6e01ceb35c7b0f7f2ff.tar.bz2 |
x86: Add support for AVX10 preset and vec size in cpu-features
This commit add support for the new AVX10 cpu features:
https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf
We add checks for:
- `AVX10`: Check if AVX10 is present.
- `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support.
`make check` passes and cpuid output was checked against GNR/DMR on an
emulator.
-rw-r--r-- | manual/platform.texi | 12 | ||||
-rw-r--r-- | sysdeps/x86/bits/platform/x86.h | 14 | ||||
-rw-r--r-- | sysdeps/x86/cpu-features.c | 25 | ||||
-rw-r--r-- | sysdeps/x86/include/cpu-features.h | 27 | ||||
-rw-r--r-- | sysdeps/x86/tst-get-cpu-features.c | 8 |
5 files changed, 83 insertions, 3 deletions
diff --git a/manual/platform.texi b/manual/platform.texi index 2a2d557..478b6fd 100644 --- a/manual/platform.texi +++ b/manual/platform.texi @@ -223,6 +223,18 @@ Leaf (EAX = 23H). @code{AVX} -- The AVX instruction extensions. @item +@code{AVX10} -- The AVX10 instruction extensions. + +@item +@code{AVX10_XMM} -- Whether AVX10 includes xmm registers. + +@item +@code{AVX10_YMM} -- Whether AVX10 includes ymm registers. + +@item +@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers. + +@item @code{AVX2} -- The AVX2 instruction extensions. @item diff --git a/sysdeps/x86/bits/platform/x86.h b/sysdeps/x86/bits/platform/x86.h index 88ca071..1e23d53 100644 --- a/sysdeps/x86/bits/platform/x86.h +++ b/sysdeps/x86/bits/platform/x86.h @@ -30,7 +30,8 @@ enum CPUID_INDEX_80000008, CPUID_INDEX_7_ECX_1, CPUID_INDEX_19, - CPUID_INDEX_14_ECX_0 + CPUID_INDEX_14_ECX_0, + CPUID_INDEX_24_ECX_0 }; struct cpuid_feature @@ -312,6 +313,7 @@ enum x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5, x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8, x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14, + x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19, x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21, x86_cpu_index_19_ebx @@ -325,5 +327,13 @@ enum = (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int) + cpuid_register_index_ebx * 8 * sizeof (unsigned int)), - x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4 + x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4, + + x86_cpu_index_24_ecx_0_ebx + = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int) + + cpuid_register_index_ebx * 8 * sizeof (unsigned int)), + + x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16, + x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17, + x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18, }; diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index badf088..0bf923d 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features) CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK); #endif + enum + { + os_xmm = 1, + os_ymm = 2, + os_zmm = 4 + } os_vector_size = os_xmm; /* Can we call xgetbv? */ if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) { unsigned int xcrlow; unsigned int xcrhigh; + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10); asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); /* Is YMM and XMM state usable? */ if ((xcrlow & (bit_YMM_state | bit_XMM_state)) @@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features) /* Determine if AVX is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX)) { + os_vector_size |= os_ymm; CPU_FEATURE_SET (cpu_features, AVX); /* The following features depend on AVX being usable. */ /* Determine if AVX2 is usable. */ @@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features) | bit_ZMM16_31_state)) == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) { + os_vector_size |= os_zmm; /* Determine if AVX512F is usable. */ if (CPU_FEATURES_CPU_P (cpu_features, AVX512F)) { @@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features) } } + if (CPU_FEATURES_CPU_P (cpu_features, AVX10) + && cpu_features->basic.max_cpuid >= 0x24) + { + __cpuid_count ( + 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax, + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx, + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx, + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx); + if (os_vector_size & os_xmm) + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM); + if (os_vector_size & os_ymm) + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM); + if (os_vector_size & os_zmm) + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM); + } + /* Are XTILECFG and XTILEDATA states usable? */ if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state)) == (bit_XTILECFG_state | bit_XTILEDATA_state)) diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h index eb30d34..2d7427a 100644 --- a/sysdeps/x86/include/cpu-features.h +++ b/sysdeps/x86/include/cpu-features.h @@ -29,7 +29,7 @@ enum { - CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1 + CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1 }; enum @@ -319,6 +319,7 @@ enum #define bit_cpu_AVX_NE_CONVERT (1u << 5) #define bit_cpu_AMX_COMPLEX (1u << 8) #define bit_cpu_PREFETCHI (1u << 14) +#define bit_cpu_AVX10 (1u << 19) #define bit_cpu_APX_F (1u << 21) /* CPUID_INDEX_19. */ @@ -332,6 +333,13 @@ enum /* EBX. */ #define bit_cpu_PTWRITE (1u << 4) +/* CPUID_INDEX_24_ECX_0. */ + +/* EBX. */ +#define bit_cpu_AVX10_XMM (1u << 16) +#define bit_cpu_AVX10_YMM (1u << 17) +#define bit_cpu_AVX10_ZMM (1u << 18) + /* CPUID_INDEX_1. */ /* ECX. */ @@ -563,6 +571,7 @@ enum #define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1 #define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1 #define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1 +#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1 #define index_cpu_APX_F CPUID_INDEX_7_ECX_1 /* CPUID_INDEX_19. */ @@ -576,6 +585,13 @@ enum /* EBX. */ #define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0 +/* CPUID_INDEX_24_ECX_0. */ + +/* EBX. */ +#define index_cpu_AVX10_XMM CPUID_INDEX_24_ECX_0 +#define index_cpu_AVX10_YMM CPUID_INDEX_24_ECX_0 +#define index_cpu_AVX10_ZMM CPUID_INDEX_24_ECX_0 + /* CPUID_INDEX_1. */ /* ECX. */ @@ -809,6 +825,7 @@ enum #define reg_AVX_NE_CONVERT edx #define reg_AMX_COMPLEX edx #define reg_PREFETCHI edx +#define reg_AVX10 edx #define reg_APX_F edx /* CPUID_INDEX_19. */ @@ -822,6 +839,14 @@ enum /* EBX. */ #define reg_PTWRITE ebx +/* CPUID_INDEX_24_ECX_0. */ + +/* EBX. */ +#define reg_AVX10_XMM ebx +#define reg_AVX10_YMM ebx +#define reg_AVX10_ZMM ebx + + /* PREFERRED_FEATURE_INDEX_1. First define the bitindex values sequentially, then define the bit_arch* and index_arch_* lookup constants. */ diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c index b27fa73..44edd18 100644 --- a/sysdeps/x86/tst-get-cpu-features.c +++ b/sysdeps/x86/tst-get-cpu-features.c @@ -219,6 +219,7 @@ do_test (void) CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT); CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX); CHECK_CPU_FEATURE_PRESENT (PREFETCHI); + CHECK_CPU_FEATURE_PRESENT (AVX10); CHECK_CPU_FEATURE_PRESENT (APX_F); CHECK_CPU_FEATURE_PRESENT (AESKLE); CHECK_CPU_FEATURE_PRESENT (WIDE_KL); @@ -391,11 +392,18 @@ do_test (void) CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT); CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX); CHECK_CPU_FEATURE_ACTIVE (PREFETCHI); + CHECK_CPU_FEATURE_ACTIVE (AVX10); CHECK_CPU_FEATURE_ACTIVE (APX_F); CHECK_CPU_FEATURE_ACTIVE (AESKLE); CHECK_CPU_FEATURE_ACTIVE (WIDE_KL); CHECK_CPU_FEATURE_ACTIVE (PTWRITE); + if (CPU_FEATURE_ACTIVE (AVX10)) + { + CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM); + CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM); + CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM); + } return 0; } |