aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2018-12-03 05:54:43 -0800
committerH.J. Lu <hjl.tools@gmail.com>2018-12-03 05:54:56 -0800
commitc22e4c2a1431c5e77bf4288d35bf7629f2f093aa (patch)
tree666ae70088b899ad87a250a08b3be7a24bbc0157
parent7b36d26b22d147ffc347f427f9fd584700578a94 (diff)
downloadglibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.zip
glibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.tar.gz
glibc-c22e4c2a1431c5e77bf4288d35bf7629f2f093aa.tar.bz2
x86: Extend CPUID support in struct cpu_features
Extend CPUID support for all feature bits from CPUID. Add a new macro, CPU_FEATURE_USABLE, which can be used to check if a feature is usable at run-time, instead of HAS_CPU_FEATURE and HAS_ARCH_FEATURE. Add COMMON_CPUID_INDEX_D_ECX_1, COMMON_CPUID_INDEX_80000007 and COMMON_CPUID_INDEX_80000008 to check CPU feature bits in them. Tested on i686 and x86-64 as well as using build-many-glibcs.py with x86 targets. * sysdeps/x86/cacheinfo.c (intel_check_word): Updated for cpu_features_basic. (__cache_sysconf): Likewise. (init_cacheinfo): Likewise. * sysdeps/x86/cpu-features.c (get_extended_indeces): Also populate COMMON_CPUID_INDEX_80000007 and COMMON_CPUID_INDEX_80000008. (get_common_indices): Also populate COMMON_CPUID_INDEX_D_ECX_1. Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if XSAVEC is available. Set the bit_arch_XXX_Usable bits. (init_cpu_features): Use _Static_assert on index_arch_Fast_Unaligned_Load. __get_cpuid_registers and __get_arch_feature. Updated for cpu_features_basic. Set stepping in cpu_features. * sysdeps/x86/cpu-features.h: (FEATURE_INDEX_1): Changed to enum. (FEATURE_INDEX_2): New. (FEATURE_INDEX_MAX): Changed to enum. (COMMON_CPUID_INDEX_D_ECX_1): New. (COMMON_CPUID_INDEX_80000007): Likewise. (COMMON_CPUID_INDEX_80000008): Likewise. (cpuid_registers): Likewise. (cpu_features_basic): Likewise. (CPU_FEATURE_USABLE): Likewise. (bit_arch_XXX_Usable): Likewise. (cpu_features): Use cpuid_registers and cpu_features_basic. (bit_arch_XXX): Reweritten. (bit_cpu_XXX): Likewise. (index_cpu_XXX): Likewise. (reg_XXX): Likewise. * sysdeps/x86/tst-get-cpu-features.c: Include <stdio.h> and <support/check.h>. (CHECK_CPU_FEATURE): New. (CHECK_CPU_FEATURE_USABLE): Likewise. (cpu_kinds): Likewise. (do_test): Print vendor, family, model and stepping. Check HAS_CPU_FEATURE and CPU_FEATURE_USABLE. (TEST_FUNCTION): Removed. Include <support/test-driver.c> instead of "../../test-skeleton.c". * sysdeps/x86_64/multiarch/sched_cpucount.c (__sched_cpucount): Check POPCNT instead of POPCOUNT. * sysdeps/x86_64/multiarch/test-multiarch.c (do_test): Likewise.
-rw-r--r--ChangeLog45
-rw-r--r--sysdeps/x86/cacheinfo.c20
-rw-r--r--sysdeps/x86/cpu-features.c141
-rw-r--r--sysdeps/x86/cpu-features.h1063
-rw-r--r--sysdeps/x86/tst-get-cpu-features.c264
-rw-r--r--sysdeps/x86_64/multiarch/sched_cpucount.c2
-rw-r--r--sysdeps/x86_64/multiarch/test-multiarch.c4
7 files changed, 1294 insertions, 245 deletions
diff --git a/ChangeLog b/ChangeLog
index 030c567..a11a9c6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,48 @@
+2018-12-03 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86/cacheinfo.c (intel_check_word): Updated for
+ cpu_features_basic.
+ (__cache_sysconf): Likewise.
+ (init_cacheinfo): Likewise.
+ * sysdeps/x86/cpu-features.c (get_extended_indeces): Also
+ populate COMMON_CPUID_INDEX_80000007 and
+ COMMON_CPUID_INDEX_80000008.
+ (get_common_indices): Also populate COMMON_CPUID_INDEX_D_ECX_1.
+ Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if
+ XSAVEC is available. Set the bit_arch_XXX_Usable bits.
+ (init_cpu_features): Use _Static_assert on
+ index_arch_Fast_Unaligned_Load.
+ __get_cpuid_registers and __get_arch_feature. Updated for
+ cpu_features_basic. Set stepping in cpu_features.
+ * sysdeps/x86/cpu-features.h: (FEATURE_INDEX_1): Changed to enum.
+ (FEATURE_INDEX_2): New.
+ (FEATURE_INDEX_MAX): Changed to enum.
+ (COMMON_CPUID_INDEX_D_ECX_1): New.
+ (COMMON_CPUID_INDEX_80000007): Likewise.
+ (COMMON_CPUID_INDEX_80000008): Likewise.
+ (cpuid_registers): Likewise.
+ (cpu_features_basic): Likewise.
+ (CPU_FEATURE_USABLE): Likewise.
+ (bit_arch_XXX_Usable): Likewise.
+ (cpu_features): Use cpuid_registers and cpu_features_basic.
+ (bit_arch_XXX): Reweritten.
+ (bit_cpu_XXX): Likewise.
+ (index_cpu_XXX): Likewise.
+ (reg_XXX): Likewise.
+ * sysdeps/x86/tst-get-cpu-features.c: Include <stdio.h> and
+ <support/check.h>.
+ (CHECK_CPU_FEATURE): New.
+ (CHECK_CPU_FEATURE_USABLE): Likewise.
+ (cpu_kinds): Likewise.
+ (do_test): Print vendor, family, model and stepping. Check
+ HAS_CPU_FEATURE and CPU_FEATURE_USABLE.
+ (TEST_FUNCTION): Removed.
+ Include <support/test-driver.c> instead of
+ "../../test-skeleton.c".
+ * sysdeps/x86_64/multiarch/sched_cpucount.c (__sched_cpucount):
+ Check POPCNT instead of POPCOUNT.
+ * sysdeps/x86_64/multiarch/test-multiarch.c (do_test): Likewise.
+
2018-12-03 Samuel Thibault <samuel.thibault@ens-lyon.org>
* scripts/gen-as-const.py (main): Avoid emitting empty line when
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index b9444dd..58f0a3c 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -205,8 +205,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
/* Intel reused this value. For family 15, model 6 it
specifies the 3rd level cache. Otherwise the 2nd
level cache. */
- unsigned int family = cpu_features->family;
- unsigned int model = cpu_features->model;
+ unsigned int family = cpu_features->basic.family;
+ unsigned int model = cpu_features->basic.model;
if (family == 15 && model == 6)
{
@@ -258,7 +258,7 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
static long int __attribute__ ((noinline))
handle_intel (int name, const struct cpu_features *cpu_features)
{
- unsigned int maxidx = cpu_features->max_cpuid;
+ unsigned int maxidx = cpu_features->basic.max_cpuid;
/* Return -1 for older CPUs. */
if (maxidx < 2)
@@ -443,10 +443,10 @@ __cache_sysconf (int name)
{
const struct cpu_features *cpu_features = __get_cpu_features ();
- if (cpu_features->kind == arch_kind_intel)
+ if (cpu_features->basic.kind == arch_kind_intel)
return handle_intel (name, cpu_features);
- if (cpu_features->kind == arch_kind_amd)
+ if (cpu_features->basic.kind == arch_kind_amd)
return handle_amd (name);
// XXX Fill in more vendors.
@@ -497,9 +497,9 @@ init_cacheinfo (void)
unsigned int level;
unsigned int threads = 0;
const struct cpu_features *cpu_features = __get_cpu_features ();
- int max_cpuid = cpu_features->max_cpuid;
+ int max_cpuid = cpu_features->basic.max_cpuid;
- if (cpu_features->kind == arch_kind_intel)
+ if (cpu_features->basic.kind == arch_kind_intel)
{
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
@@ -538,8 +538,8 @@ init_cacheinfo (void)
highest cache level. */
if (max_cpuid >= 4)
{
- unsigned int family = cpu_features->family;
- unsigned int model = cpu_features->model;
+ unsigned int family = cpu_features->basic.family;
+ unsigned int model = cpu_features->basic.model;
int i = 0;
@@ -700,7 +700,7 @@ intel_bug_no_cache_info:
shared += core;
}
}
- else if (cpu_features->kind == arch_kind_amd)
+ else if (cpu_features->basic.kind == arch_kind_amd)
{
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 80b3054..5f9eefd 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -53,7 +53,18 @@ get_extended_indices (struct cpu_features *cpu_features)
cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
-
+ if (eax >= 0x80000007)
+ __cpuid (0x80000007,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].edx);
+ if (eax >= 0x80000008)
+ __cpuid (0x80000008,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].edx);
}
static void
@@ -79,13 +90,20 @@ get_common_indices (struct cpu_features *cpu_features,
}
}
- if (cpu_features->max_cpuid >= 7)
+ if (cpu_features->basic.max_cpuid >= 7)
__cpuid_count (7, 0,
cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+ if (cpu_features->basic.max_cpuid >= 0xd)
+ __cpuid_count (0xd, 1,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].edx);
+
/* Can we call xgetbv? */
if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
{
@@ -117,6 +135,18 @@ get_common_indices (struct cpu_features *cpu_features,
if (CPU_FEATURES_CPU_P (cpu_features, FMA))
cpu_features->feature[index_arch_FMA_Usable]
|= bit_arch_FMA_Usable;
+ /* Determine if VAES is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, VAES))
+ cpu_features->feature[index_arch_VAES_Usable]
+ |= bit_arch_VAES_Usable;
+ /* Determine if VPCLMULQDQ is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, VPCLMULQDQ))
+ cpu_features->feature[index_arch_VPCLMULQDQ_Usable]
+ |= bit_arch_VPCLMULQDQ_Usable;
+ /* Determine if XOP is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, XOP))
+ cpu_features->feature[index_arch_XOP_Usable]
+ |= bit_arch_XOP_Usable;
}
/* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
@@ -130,17 +160,69 @@ get_common_indices (struct cpu_features *cpu_features,
{
cpu_features->feature[index_arch_AVX512F_Usable]
|= bit_arch_AVX512F_Usable;
+ /* Determine if AVX512CD is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512CD))
+ cpu_features->feature[index_arch_AVX512CD_Usable]
+ |= bit_arch_AVX512CD_Usable;
+ /* Determine if AVX512ER is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
+ cpu_features->feature[index_arch_AVX512ER_Usable]
+ |= bit_arch_AVX512ER_Usable;
+ /* Determine if AVX512PF is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF))
+ cpu_features->feature[index_arch_AVX512PF_Usable]
+ |= bit_arch_AVX512PF_Usable;
+ /* Determine if AVX512VL is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512VL))
+ cpu_features->feature[index_arch_AVX512VL_Usable]
+ |= bit_arch_AVX512VL_Usable;
/* Determine if AVX512DQ is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
cpu_features->feature[index_arch_AVX512DQ_Usable]
|= bit_arch_AVX512DQ_Usable;
+ /* Determine if AVX512BW is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW))
+ cpu_features->feature[index_arch_AVX512BW_Usable]
+ |= bit_arch_AVX512BW_Usable;
+ /* Determine if AVX512_4FMAPS is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4FMAPS))
+ cpu_features->feature[index_arch_AVX512_4FMAPS_Usable]
+ |= bit_arch_AVX512_4FMAPS_Usable;
+ /* Determine if AVX512_4VNNIW is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4VNNIW))
+ cpu_features->feature[index_arch_AVX512_4VNNIW_Usable]
+ |= bit_arch_AVX512_4VNNIW_Usable;
+ /* Determine if AVX512_BITALG is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BITALG))
+ cpu_features->feature[index_arch_AVX512_BITALG_Usable]
+ |= bit_arch_AVX512_BITALG_Usable;
+ /* Determine if AVX512_IFMA is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_IFMA))
+ cpu_features->feature[index_arch_AVX512_IFMA_Usable]
+ |= bit_arch_AVX512_IFMA_Usable;
+ /* Determine if AVX512_VBMI is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI))
+ cpu_features->feature[index_arch_AVX512_VBMI_Usable]
+ |= bit_arch_AVX512_VBMI_Usable;
+ /* Determine if AVX512_VBMI2 is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI2))
+ cpu_features->feature[index_arch_AVX512_VBMI2_Usable]
+ |= bit_arch_AVX512_VBMI2_Usable;
+ /* Determine if is AVX512_VNNI usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VNNI))
+ cpu_features->feature[index_arch_AVX512_VNNI_Usable]
+ |= bit_arch_AVX512_VNNI_Usable;
+ /* Determine if AVX512_VPOPCNTDQ is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ))
+ cpu_features->feature[index_arch_AVX512_VPOPCNTDQ_Usable]
+ |= bit_arch_AVX512_VPOPCNTDQ_Usable;
}
}
}
/* For _dl_runtime_resolve, set xsave_state_size to xsave area
size + integer register save size and align it to 64 bytes. */
- if (cpu_features->max_cpuid >= 0xd)
+ if (cpu_features->basic.max_cpuid >= 0xd)
{
unsigned int eax, ebx, ecx, edx;
@@ -155,10 +237,8 @@ get_common_indices (struct cpu_features *cpu_features,
cpu_features->xsave_state_full_size
= xsave_state_full_size;
- __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
-
/* Check if XSAVEC is available. */
- if ((eax & (1 << 1)) != 0)
+ if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
{
unsigned int xstate_comp_offsets[32];
unsigned int xstate_comp_sizes[32];
@@ -210,12 +290,25 @@ get_common_indices (struct cpu_features *cpu_features,
}
}
+_Static_assert (((index_arch_Fast_Unaligned_Load
+ == index_arch_Fast_Unaligned_Copy)
+ && (index_arch_Fast_Unaligned_Load
+ == index_arch_Prefer_PMINUB_for_stringop)
+ && (index_arch_Fast_Unaligned_Load
+ == index_arch_Slow_SSE4_2)
+ && (index_arch_Fast_Unaligned_Load
+ == index_arch_Fast_Rep_String)
+ && (index_arch_Fast_Unaligned_Load
+ == index_arch_Fast_Copy_Backward)),
+ "Incorrect index_arch_Fast_Unaligned_Load");
+
static inline void
init_cpu_features (struct cpu_features *cpu_features)
{
unsigned int ebx, ecx, edx;
unsigned int family = 0;
unsigned int model = 0;
+ unsigned int stepping = 0;
enum cpu_features_kind kind;
#if !HAS_CPUID
@@ -226,12 +319,12 @@ init_cpu_features (struct cpu_features *cpu_features)
}
#endif
- __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
+ __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
/* This spells out "GenuineIntel". */
if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
{
- unsigned int extended_model, stepping;
+ unsigned int extended_model;
kind = arch_kind_intel;
@@ -270,15 +363,6 @@ init_cpu_features (struct cpu_features *cpu_features)
case 0x5d:
/* Unaligned load versions are faster than SSSE3
on Silvermont. */
-#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
-# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
-#endif
-#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
-# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
-#endif
-#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
-# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
-#endif
cpu_features->feature[index_arch_Fast_Unaligned_Load]
|= (bit_arch_Fast_Unaligned_Load
| bit_arch_Fast_Unaligned_Copy
@@ -301,15 +385,6 @@ init_cpu_features (struct cpu_features *cpu_features)
case 0x2f:
/* Rep string instructions, unaligned load, unaligned copy,
and pminub are fast on Intel Core i3, i5 and i7. */
-#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
-# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
-#endif
-#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
-# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
-#endif
-#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
-# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
-#endif
cpu_features->feature[index_arch_Fast_Rep_String]
|= (bit_arch_Fast_Rep_String
| bit_arch_Fast_Unaligned_Load
@@ -353,7 +428,7 @@ init_cpu_features (struct cpu_features *cpu_features)
/* This spells out "AuthenticAMD". */
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
{
- unsigned int extended_model, stepping;
+ unsigned int extended_model;
kind = arch_kind_amd;
@@ -375,9 +450,6 @@ init_cpu_features (struct cpu_features *cpu_features)
if (family == 0x15)
{
-#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
-# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
-#endif
/* "Excavator" */
if (model >= 0x60 && model <= 0x7f)
{
@@ -409,9 +481,10 @@ init_cpu_features (struct cpu_features *cpu_features)
no_cpuid:
#endif
- cpu_features->family = family;
- cpu_features->model = model;
- cpu_features->kind = kind;
+ cpu_features->basic.kind = kind;
+ cpu_features->basic.family = family;
+ cpu_features->basic.model = model;
+ cpu_features->basic.stepping = stepping;
#if HAVE_TUNABLES
TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
@@ -432,7 +505,7 @@ no_cpuid:
#ifdef __x86_64__
GLRO(dl_hwcap) = HWCAP_X86_64;
- if (cpu_features->kind == arch_kind_intel)
+ if (cpu_features->basic.kind == arch_kind_intel)
{
const char *platform = NULL;
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index fb22d7b..4917182 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -18,108 +18,58 @@
#ifndef cpu_features_h
#define cpu_features_h
-#define bit_arch_Fast_Rep_String (1 << 0)
-#define bit_arch_Fast_Copy_Backward (1 << 1)
-#define bit_arch_Slow_BSF (1 << 2)
-#define bit_arch_Fast_Unaligned_Load (1 << 4)
-#define bit_arch_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_arch_AVX_Usable (1 << 6)
-#define bit_arch_FMA_Usable (1 << 7)
-#define bit_arch_FMA4_Usable (1 << 8)
-#define bit_arch_Slow_SSE4_2 (1 << 9)
-#define bit_arch_AVX2_Usable (1 << 10)
-#define bit_arch_AVX_Fast_Unaligned_Load (1 << 11)
-#define bit_arch_AVX512F_Usable (1 << 12)
-#define bit_arch_AVX512DQ_Usable (1 << 13)
-#define bit_arch_I586 (1 << 14)
-#define bit_arch_I686 (1 << 15)
-#define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16)
-#define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
-#define bit_arch_Fast_Unaligned_Copy (1 << 18)
-#define bit_arch_Prefer_ERMS (1 << 19)
-#define bit_arch_Prefer_No_AVX512 (1 << 20)
-#define bit_arch_MathVec_Prefer_No_AVX512 (1 << 21)
-#define bit_arch_XSAVEC_Usable (1 << 22)
-#define bit_arch_Prefer_FSRM (1 << 23)
-
-/* CPUID Feature flags. */
-
-/* COMMON_CPUID_INDEX_1. */
-#define bit_cpu_CX8 (1 << 8)
-#define bit_cpu_CMOV (1 << 15)
-#define bit_cpu_SSE (1 << 25)
-#define bit_cpu_SSE2 (1 << 26)
-#define bit_cpu_SSSE3 (1 << 9)
-#define bit_cpu_SSE4_1 (1 << 19)
-#define bit_cpu_SSE4_2 (1 << 20)
-#define bit_cpu_OSXSAVE (1 << 27)
-#define bit_cpu_AVX (1 << 28)
-#define bit_cpu_POPCOUNT (1 << 23)
-#define bit_cpu_FMA (1 << 12)
-#define bit_cpu_FMA4 (1 << 16)
-#define bit_cpu_HTT (1 << 28)
-#define bit_cpu_LZCNT (1 << 5)
-#define bit_cpu_MOVBE (1 << 22)
-#define bit_cpu_POPCNT (1 << 23)
-
-/* COMMON_CPUID_INDEX_7. */
-#define bit_cpu_BMI1 (1 << 3)
-#define bit_cpu_BMI2 (1 << 8)
-#define bit_cpu_ERMS (1 << 9)
-#define bit_cpu_RTM (1 << 11)
-#define bit_cpu_AVX2 (1 << 5)
-#define bit_cpu_AVX512F (1 << 16)
-#define bit_cpu_AVX512DQ (1 << 17)
-#define bit_cpu_AVX512PF (1 << 26)
-#define bit_cpu_AVX512ER (1 << 27)
-#define bit_cpu_AVX512CD (1 << 28)
-#define bit_cpu_AVX512BW (1 << 30)
-#define bit_cpu_AVX512VL (1u << 31)
-#define bit_cpu_IBT (1u << 20)
-#define bit_cpu_SHSTK (1u << 7)
-#define bit_cpu_FSRM (1 << 4)
-
-/* XCR0 Feature flags. */
-#define bit_XMM_state (1 << 1)
-#define bit_YMM_state (1 << 2)
-#define bit_Opmask_state (1 << 5)
-#define bit_ZMM0_15_state (1 << 6)
-#define bit_ZMM16_31_state (1 << 7)
+enum
+{
+ /* The integer bit array index for the first set of internal feature
+ bits. */
+ FEATURE_INDEX_1 = 0,
+ FEATURE_INDEX_2,
+ /* The current maximum size of the feature integer bit array. */
+ FEATURE_INDEX_MAX
+};
-/* The integer bit array index for the first set of internal feature bits. */
-#define FEATURE_INDEX_1 0
+enum
+{
+ COMMON_CPUID_INDEX_1 = 0,
+ COMMON_CPUID_INDEX_7,
+ COMMON_CPUID_INDEX_80000001,
+ COMMON_CPUID_INDEX_D_ECX_1,
+ COMMON_CPUID_INDEX_80000007,
+ COMMON_CPUID_INDEX_80000008,
+ /* Keep the following line at the end. */
+ COMMON_CPUID_INDEX_MAX
+};
-/* The current maximum size of the feature integer bit array. */
-#define FEATURE_INDEX_MAX 1
+struct cpuid_registers
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+};
-enum
- {
- COMMON_CPUID_INDEX_1 = 0,
- COMMON_CPUID_INDEX_7,
- COMMON_CPUID_INDEX_80000001,
- /* Keep the following line at the end. */
- COMMON_CPUID_INDEX_MAX
- };
+enum cpu_features_kind
+{
+ arch_kind_unknown = 0,
+ arch_kind_intel,
+ arch_kind_amd,
+ arch_kind_other
+};
-struct cpu_features
+struct cpu_features_basic
{
- enum cpu_features_kind
- {
- arch_kind_unknown = 0,
- arch_kind_intel,
- arch_kind_amd,
- arch_kind_other
- } kind;
+ enum cpu_features_kind kind;
int max_cpuid;
- struct cpuid_registers
- {
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- } cpuid[COMMON_CPUID_INDEX_MAX];
unsigned int family;
unsigned int model;
+ unsigned int stepping;
+};
+
+struct cpu_features
+{
+ struct cpuid_registers cpuid[COMMON_CPUID_INDEX_MAX];
+ unsigned int feature[FEATURE_INDEX_MAX];
+ struct cpu_features_basic basic;
/* The state size for XSAVEC or XSAVE. The type must be unsigned long
int so that we use
@@ -132,7 +82,6 @@ struct cpu_features
GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC_Usable
*/
unsigned int xsave_state_full_size;
- unsigned int feature[FEATURE_INDEX_MAX];
/* Data cache size for use in memory and string routines, typically
L1 size. */
unsigned long int data_cache_size;
@@ -148,112 +97,838 @@ struct cpu_features
extern const struct cpu_features *__get_cpu_features (void)
__attribute__ ((const));
-# if defined (_LIBC) && !IS_IN (nonlib)
-/* Unused for x86. */
-# define INIT_ARCH()
-# define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
-# endif
-
-
/* Only used directly in cpu-features.c. */
# define CPU_FEATURES_CPU_P(ptr, name) \
((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
# define CPU_FEATURES_ARCH_P(ptr, name) \
((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
-/* HAS_* evaluates to true if we may use the feature at runtime. */
-# define HAS_CPU_FEATURE(name) \
- CPU_FEATURES_CPU_P (__get_cpu_features (), name)
+/* HAS_CPU_FEATURE evaluates to true if CPU supports the feature. */
+#define HAS_CPU_FEATURE(name) \
+ CPU_FEATURES_CPU_P (__get_cpu_features (), name)
+/* HAS_ARCH_FEATURE evaluates to true if we may use the feature at
+ runtime. */
# define HAS_ARCH_FEATURE(name) \
- CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
-
-# define index_cpu_CX8 COMMON_CPUID_INDEX_1
-# define index_cpu_CMOV COMMON_CPUID_INDEX_1
-# define index_cpu_SSE COMMON_CPUID_INDEX_1
-# define index_cpu_SSE2 COMMON_CPUID_INDEX_1
-# define index_cpu_SSSE3 COMMON_CPUID_INDEX_1
-# define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1
-# define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1
-# define index_cpu_AVX COMMON_CPUID_INDEX_1
-# define index_cpu_AVX2 COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512F COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512PF COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512ER COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512CD COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512BW COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512VL COMMON_CPUID_INDEX_7
-# define index_cpu_ERMS COMMON_CPUID_INDEX_7
-# define index_cpu_RTM COMMON_CPUID_INDEX_7
-# define index_cpu_FMA COMMON_CPUID_INDEX_1
-# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
-# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
-# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
-# define index_cpu_HTT COMMON_CPUID_INDEX_1
-# define index_cpu_BMI1 COMMON_CPUID_INDEX_7
-# define index_cpu_BMI2 COMMON_CPUID_INDEX_7
-# define index_cpu_LZCNT COMMON_CPUID_INDEX_80000001
-# define index_cpu_MOVBE COMMON_CPUID_INDEX_1
-# define index_cpu_POPCNT COMMON_CPUID_INDEX_1
-# define index_cpu_IBT COMMON_CPUID_INDEX_7
-# define index_cpu_SHSTK COMMON_CPUID_INDEX_7
-# define index_cpu_FSRM COMMON_CPUID_INDEX_7
-
-# define reg_CX8 edx
-# define reg_CMOV edx
-# define reg_SSE edx
-# define reg_SSE2 edx
-# define reg_SSSE3 ecx
-# define reg_SSE4_1 ecx
-# define reg_SSE4_2 ecx
-# define reg_AVX ecx
-# define reg_AVX2 ebx
-# define reg_AVX512F ebx
-# define reg_AVX512DQ ebx
-# define reg_AVX512PF ebx
-# define reg_AVX512ER ebx
-# define reg_AVX512CD ebx
-# define reg_AVX512BW ebx
-# define reg_AVX512VL ebx
-# define reg_ERMS ebx
-# define reg_RTM ebx
-# define reg_FMA ecx
-# define reg_FMA4 ecx
-# define reg_POPCOUNT ecx
-# define reg_OSXSAVE ecx
-# define reg_HTT edx
-# define reg_BMI1 ebx
-# define reg_BMI2 ebx
-# define reg_LZCNT ecx
-# define reg_MOVBE ecx
-# define reg_POPCNT ecx
-# define reg_IBT edx
-# define reg_SHSTK ecx
-# define reg_FSRM edx
-
-# define index_arch_Fast_Rep_String FEATURE_INDEX_1
-# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
-# define index_arch_Slow_BSF FEATURE_INDEX_1
-# define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
-# define index_arch_AVX_Usable FEATURE_INDEX_1
-# define index_arch_FMA_Usable FEATURE_INDEX_1
-# define index_arch_FMA4_Usable FEATURE_INDEX_1
-# define index_arch_Slow_SSE4_2 FEATURE_INDEX_1
-# define index_arch_AVX2_Usable FEATURE_INDEX_1
-# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_arch_AVX512F_Usable FEATURE_INDEX_1
-# define index_arch_AVX512DQ_Usable FEATURE_INDEX_1
-# define index_arch_I586 FEATURE_INDEX_1
-# define index_arch_I686 FEATURE_INDEX_1
-# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
-# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
-# define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1
-# define index_arch_Prefer_ERMS FEATURE_INDEX_1
-# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1
-# define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_1
-# define index_arch_XSAVEC_Usable FEATURE_INDEX_1
-# define index_arch_Prefer_FSRM FEATURE_INDEX_1
+ CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
+/* CPU_FEATURE_USABLE evaluates to true if the feature is usable. */
+#define CPU_FEATURE_USABLE(name) \
+ ((need_arch_feature_##name && HAS_ARCH_FEATURE (name##_Usable)) \
+ || (!need_arch_feature_##name && HAS_CPU_FEATURE(name)))
+
+/* Architecture features. */
+
+/* FEATURE_INDEX_1. */
+#define bit_arch_AVX_Usable (1u << 0)
+#define bit_arch_AVX2_Usable (1u << 1)
+#define bit_arch_AVX512F_Usable (1u << 2)
+#define bit_arch_AVX512CD_Usable (1u << 3)
+#define bit_arch_AVX512ER_Usable (1u << 4)
+#define bit_arch_AVX512PF_Usable (1u << 5)
+#define bit_arch_AVX512VL_Usable (1u << 6)
+#define bit_arch_AVX512DQ_Usable (1u << 7)
+#define bit_arch_AVX512BW_Usable (1u << 8)
+#define bit_arch_AVX512_4FMAPS_Usable (1u << 9)
+#define bit_arch_AVX512_4VNNIW_Usable (1u << 10)
+#define bit_arch_AVX512_BITALG_Usable (1u << 11)
+#define bit_arch_AVX512_IFMA_Usable (1u << 12)
+#define bit_arch_AVX512_VBMI_Usable (1u << 13)
+#define bit_arch_AVX512_VBMI2_Usable (1u << 14)
+#define bit_arch_AVX512_VNNI_Usable (1u << 15)
+#define bit_arch_AVX512_VPOPCNTDQ_Usable (1u << 16)
+#define bit_arch_FMA_Usable (1u << 17)
+#define bit_arch_FMA4_Usable (1u << 18)
+#define bit_arch_VAES_Usable (1u << 19)
+#define bit_arch_VPCLMULQDQ_Usable (1u << 20)
+#define bit_arch_XOP_Usable (1u << 21)
+#define bit_arch_XSAVEC_Usable (1u << 22)
+
+#define index_arch_AVX_Usable FEATURE_INDEX_1
+#define index_arch_AVX2_Usable FEATURE_INDEX_1
+#define index_arch_AVX512F_Usable FEATURE_INDEX_1
+#define index_arch_AVX512CD_Usable FEATURE_INDEX_1
+#define index_arch_AVX512ER_Usable FEATURE_INDEX_1
+#define index_arch_AVX512PF_Usable FEATURE_INDEX_1
+#define index_arch_AVX512VL_Usable FEATURE_INDEX_1
+#define index_arch_AVX512BW_Usable FEATURE_INDEX_1
+#define index_arch_AVX512DQ_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_4FMAPS_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_4VNNIW_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_BITALG_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_IFMA_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_VBMI_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_VBMI2_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_VNNI_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_VPOPCNTDQ_Usable FEATURE_INDEX_1
+#define index_arch_FMA_Usable FEATURE_INDEX_1
+#define index_arch_FMA4_Usable FEATURE_INDEX_1
+#define index_arch_VAES_Usable FEATURE_INDEX_1
+#define index_arch_VPCLMULQDQ_Usable FEATURE_INDEX_1
+#define index_arch_XOP_Usable FEATURE_INDEX_1
+#define index_arch_XSAVEC_Usable FEATURE_INDEX_1
+
+/* Unused. Compiler will optimize them out. */
+#define bit_arch_SSE3_Usable (1u << 0)
+#define bit_arch_PCLMULQDQ_Usable (1u << 0)
+#define bit_arch_SSSE3_Usable (1u << 0)
+#define bit_arch_CMPXCHG16B_Usable (1u << 0)
+#define bit_arch_SSE4_1_Usable (1u << 0)
+#define bit_arch_SSE4_2_Usable (1u << 0)
+#define bit_arch_MOVBE_Usable (1u << 0)
+#define bit_arch_POPCNT_Usable (1u << 0)
+#define bit_arch_AES_Usable (1u << 0)
+#define bit_arch_XSAVE_Usable (1u << 0)
+#define bit_arch_OSXSAVE_Usable (1u << 0)
+#define bit_arch_F16C_Usable (1u << 0)
+#define bit_arch_RDRAND_Usable (1u << 0)
+#define bit_arch_FPU_Usable (1u << 0)
+#define bit_arch_TSC_Usable (1u << 0)
+#define bit_arch_MSR_Usable (1u << 0)
+#define bit_arch_CX8_Usable (1u << 0)
+#define bit_arch_SEP_Usable (1u << 0)
+#define bit_arch_CMOV_Usable (1u << 0)
+#define bit_arch_CLFSH_Usable (1u << 0)
+#define bit_arch_MMX_Usable (1u << 0)
+#define bit_arch_FXSR_Usable (1u << 0)
+#define bit_arch_SSE_Usable (1u << 0)
+#define bit_arch_SSE2_Usable (1u << 0)
+#define bit_arch_FSGSBASE_Usable (1u << 0)
+#define bit_arch_BMI1_Usable (1u << 0)
+#define bit_arch_HLE_Usable (1u << 0)
+#define bit_arch_BMI2_Usable (1u << 0)
+#define bit_arch_ERMS_Usable (1u << 0)
+#define bit_arch_RTM_Usable (1u << 0)
+#define bit_arch_RDSEED_Usable (1u << 0)
+#define bit_arch_ADX_Usable (1u << 0)
+#define bit_arch_CLFLUSHOPT_Usable (1u << 0)
+#define bit_arch_CLWB_Usable (1u << 0)
+#define bit_arch_SHA_Usable (1u << 0)
+#define bit_arch_PREFETCHWT1_Usable (1u << 0)
+#define bit_arch_GFNI_Usable (1u << 0)
+#define bit_arch_RDPID_Usable (1u << 0)
+#define bit_arch_CLDEMOTE_Usable (1u << 0)
+#define bit_arch_MOVDIRI_Usable (1u << 0)
+#define bit_arch_MOVDIR64B_Usable (1u << 0)
+#define bit_arch_FSRM_Usable (1u << 0)
+#define bit_arch_LAHF64_SAHF64_Usable (1u << 0)
+#define bit_arch_SVM_Usable (1u << 0)
+#define bit_arch_LZCNT_Usable (1u << 0)
+#define bit_arch_SSE4A_Usable (1u << 0)
+#define bit_arch_PREFETCHW_Usable (1u << 0)
+#define bit_arch_TBM_Usable (1u << 0)
+#define bit_arch_SYSCALL_SYSRET_Usable (1u << 0)
+#define bit_arch_RDTSCP_Usable (1u << 0)
+#define bit_arch_XSAVEOPT_Usable (1u << 0)
+#define bit_arch_XGETBV_ECX_1_Usable (1u << 0)
+#define bit_arch_XSAVES_Usable (1u << 0)
+#define bit_arch_INVARIANT_TSC_Usable (1u << 0)
+#define bit_arch_WBNOINVD_Usable (1u << 0)
+
+/* Unused. Compiler will optimize them out. */
+#define index_arch_SSE3_Usable FEATURE_INDEX_1
+#define index_arch_PCLMULQDQ_Usable FEATURE_INDEX_1
+#define index_arch_SSSE3_Usable FEATURE_INDEX_1
+#define index_arch_CMPXCHG16B_Usable FEATURE_INDEX_1
+#define index_arch_SSE4_1_Usable FEATURE_INDEX_1
+#define index_arch_SSE4_2_Usable FEATURE_INDEX_1
+#define index_arch_MOVBE_Usable FEATURE_INDEX_1
+#define index_arch_POPCNT_Usable FEATURE_INDEX_1
+#define index_arch_AES_Usable FEATURE_INDEX_1
+#define index_arch_XSAVE_Usable FEATURE_INDEX_1
+#define index_arch_OSXSAVE_Usable FEATURE_INDEX_1
+#define index_arch_F16C_Usable FEATURE_INDEX_1
+#define index_arch_RDRAND_Usable FEATURE_INDEX_1
+#define index_arch_FPU_Usable FEATURE_INDEX_1
+#define index_arch_TSC_Usable FEATURE_INDEX_1
+#define index_arch_MSR_Usable FEATURE_INDEX_1
+#define index_arch_CX8_Usable FEATURE_INDEX_1
+#define index_arch_SEP_Usable FEATURE_INDEX_1
+#define index_arch_CMOV_Usable FEATURE_INDEX_1
+#define index_arch_CLFSH_Usable FEATURE_INDEX_1
+#define index_arch_MMX_Usable FEATURE_INDEX_1
+#define index_arch_FXSR_Usable FEATURE_INDEX_1
+#define index_arch_SSE_Usable FEATURE_INDEX_1
+#define index_arch_SSE2_Usable FEATURE_INDEX_1
+#define index_arch_FSGSBASE_Usable FEATURE_INDEX_1
+#define index_arch_BMI1_Usable FEATURE_INDEX_1
+#define index_arch_HLE_Usable FEATURE_INDEX_1
+#define index_arch_BMI2_Usable FEATURE_INDEX_1
+#define index_arch_ERMS_Usable FEATURE_INDEX_1
+#define index_arch_RTM_Usable FEATURE_INDEX_1
+#define index_arch_RDSEED_Usable FEATURE_INDEX_1
+#define index_arch_ADX_Usable FEATURE_INDEX_1
+#define index_arch_CLFLUSHOPT_Usable FEATURE_INDEX_1
+#define index_arch_CLWB_Usable FEATURE_INDEX_1
+#define index_arch_SHA_Usable FEATURE_INDEX_1
+#define index_arch_PREFETCHWT1_Usable FEATURE_INDEX_1
+#define index_arch_GFNI_Usable FEATURE_INDEX_1
+#define index_arch_RDPID_Usable FEATURE_INDEX_1
+#define index_arch_CLDEMOTE_Usable FEATURE_INDEX_1
+#define index_arch_MOVDIRI_Usable FEATURE_INDEX_1
+#define index_arch_MOVDIR64B_Usable FEATURE_INDEX_1
+#define index_arch_FSRM_Usable FEATURE_INDEX_1
+#define index_arch_LAHF64_SAHF64_Usable FEATURE_INDEX_1
+#define index_arch_LZCNT_Usable FEATURE_INDEX_1
+#define index_arch_SSE4A_Usable FEATURE_INDEX_1
+#define index_arch_PREFETCHW_Usable FEATURE_INDEX_1
+#define index_arch_TBM_Usable FEATURE_INDEX_1
+#define index_arch_SYSCALL_SYSRET_Usable FEATURE_INDEX_1
+#define index_arch_RDTSCP_Usable FEATURE_INDEX_1
+#define index_arch_XSAVEOPT_Usable FEATURE_INDEX_1
+#define index_arch_XGETBV_ECX_1_Usable FEATURE_INDEX_1
+#define index_arch_XSAVES_Usable FEATURE_INDEX_1
+#define index_arch_INVARIANT_TSC_Usable FEATURE_INDEX_1
+#define index_arch_WBNOINVD_Usable FEATURE_INDEX_1
+
+/* COMMON_CPUID_INDEX_1. */
+
+/* ECX. */
+#define need_arch_feature_SSE3 0
+#define need_arch_feature_PCLMULQDQ 0
+#define need_arch_feature_SSSE3 0
+#define need_arch_feature_FMA 1
+#define need_arch_feature_CMPXCHG16B 0
+#define need_arch_feature_SSE4_1 0
+#define need_arch_feature_SSE4_2 0
+#define need_arch_feature_MOVBE 0
+#define need_arch_feature_POPCNT 0
+#define need_arch_feature_AES 0
+#define need_arch_feature_XSAVE 0
+#define need_arch_feature_OSXSAVE 0
+#define need_arch_feature_AVX 1
+#define need_arch_feature_F16C 0
+#define need_arch_feature_RDRAND 0
+
+/* EDX. */
+#define need_arch_feature_FPU 0
+#define need_arch_feature_TSC 0
+#define need_arch_feature_MSR 0
+#define need_arch_feature_CX8 0
+#define need_arch_feature_SEP 0
+#define need_arch_feature_CMOV 0
+#define need_arch_feature_CLFSH 0
+#define need_arch_feature_MMX 0
+#define need_arch_feature_FXSR 0
+#define need_arch_feature_SSE 0
+#define need_arch_feature_SSE2 0
+
+/* COMMON_CPUID_INDEX_7. */
+
+/* EBX. */
+#define need_arch_feature_FSGSBASE 0
+#define need_arch_feature_BMI1 0
+#define need_arch_feature_HLE 0
+#define need_arch_feature_AVX2 1
+#define need_arch_feature_BMI2 0
+#define need_arch_feature_ERMS 0
+#define need_arch_feature_RTM 0
+#define need_arch_feature_AVX512F 1
+#define need_arch_feature_AVX512DQ 1
+#define need_arch_feature_RDSEED 0
+#define need_arch_feature_ADX 0
+#define need_arch_feature_AVX512_IFMA 1
+#define need_arch_feature_CLFLUSHOPT 0
+#define need_arch_feature_CLWB 0
+#define need_arch_feature_AVX512PF 1
+#define need_arch_feature_AVX512ER 1
+#define need_arch_feature_AVX512CD 1
+#define need_arch_feature_SHA 0
+#define need_arch_feature_AVX512BW 1
+#define need_arch_feature_AVX512VL 1
+
+/* ECX. */
+#define need_arch_feature_PREFETCHWT1 0
+#define need_arch_feature_AVX512_VBMI 1
+#define need_arch_feature_AVX512_VBMI2 1
+#define need_arch_feature_GFNI 0
+#define need_arch_feature_VAES 1
+#define need_arch_feature_VPCLMULQDQ 1
+#define need_arch_feature_AVX512_VNNI 1
+#define need_arch_feature_AVX512_BITALG 1
+#define need_arch_feature_AVX512_VPOPCNTDQ 1
+#define need_arch_feature_RDPID 0
+#define need_arch_feature_CLDEMOTE 0
+#define need_arch_feature_MOVDIRI 0
+#define need_arch_feature_MOVDIR64B 0
+
+/* EDX. */
+#define need_arch_feature_AVX512_4VNNIW 1
+#define need_arch_feature_AVX512_4FMAPS 1
+#define need_arch_feature_FSRM 0
+
+/* COMMON_CPUID_INDEX_80000001. */
+
+/* ECX. */
+#define need_arch_feature_LAHF64_SAHF64 0
+#define need_arch_feature_LZCNT 0
+#define need_arch_feature_SSE4A 0
+#define need_arch_feature_PREFETCHW 0
+#define need_arch_feature_XOP 1
+#define need_arch_feature_FMA4 1
+#define need_arch_feature_TBM 0
+#define need_arch_feature_SYSCALL_SYSRET 0
+#define need_arch_feature_RDTSCP 0
+#define need_arch_feature_XSAVEOPT 0
+#define need_arch_feature_XSAVEC 1
+#define need_arch_feature_XGETBV_ECX_1 0
+#define need_arch_feature_XSAVES 0
+#define need_arch_feature_INVARIANT_TSC 0
+#define need_arch_feature_WBNOINVD 0
+
+/* CPU features. */
+
+/* COMMON_CPUID_INDEX_1. */
+
+/* ECX. */
+#define bit_cpu_SSE3 (1u << 0)
+#define bit_cpu_PCLMULQDQ (1u << 1)
+#define bit_cpu_DTES64 (1u << 2)
+#define bit_cpu_MONITOR (1u << 3)
+#define bit_cpu_DS_CPL (1u << 4)
+#define bit_cpu_VMX (1u << 5)
+#define bit_cpu_SMX (1u << 6)
+#define bit_cpu_EST (1u << 7)
+#define bit_cpu_TM2 (1u << 8)
+#define bit_cpu_SSSE3 (1u << 9)
+#define bit_cpu_CNXT_ID (1u << 10)
+#define bit_cpu_SDBG (1u << 11)
+#define bit_cpu_FMA (1u << 12)
+#define bit_cpu_CMPXCHG16B (1u << 13)
+#define bit_cpu_XTPRUPDCTRL (1u << 14)
+#define bit_cpu_PDCM (1u << 15)
+#define bit_cpu_PCID (1u << 17)
+#define bit_cpu_DCA (1u << 18)
+#define bit_cpu_SSE4_1 (1u << 19)
+#define bit_cpu_SSE4_2 (1u << 20)
+#define bit_cpu_X2APIC (1u << 21)
+#define bit_cpu_MOVBE (1u << 22)
+#define bit_cpu_POPCNT (1u << 23)
+#define bit_cpu_TSC_DEADLINE (1u << 24)
+#define bit_cpu_AES (1u << 25)
+#define bit_cpu_XSAVE (1u << 26)
+#define bit_cpu_OSXSAVE (1u << 27)
+#define bit_cpu_AVX (1u << 28)
+#define bit_cpu_F16C (1u << 29)
+#define bit_cpu_RDRAND (1u << 30)
+
+/* EDX. */
+#define bit_cpu_FPU (1u << 0)
+#define bit_cpu_VME (1u << 1)
+#define bit_cpu_DE (1u << 2)
+#define bit_cpu_PSE (1u << 3)
+#define bit_cpu_TSC (1u << 4)
+#define bit_cpu_MSR (1u << 5)
+#define bit_cpu_PAE (1u << 6)
+#define bit_cpu_MCE (1u << 7)
+#define bit_cpu_CX8 (1u << 8)
+#define bit_cpu_APIC (1u << 9)
+#define bit_cpu_SEP (1u << 11)
+#define bit_cpu_MTRR (1u << 12)
+#define bit_cpu_PGE (1u << 13)
+#define bit_cpu_MCA (1u << 14)
+#define bit_cpu_CMOV (1u << 15)
+#define bit_cpu_PAT (1u << 16)
+#define bit_cpu_PSE_36 (1u << 17)
+#define bit_cpu_PSN (1u << 18)
+#define bit_cpu_CLFSH (1u << 20)
+#define bit_cpu_DS (1u << 21)
+#define bit_cpu_ACPI (1u << 22)
+#define bit_cpu_MMX (1u << 23)
+#define bit_cpu_FXSR (1u << 24)
+#define bit_cpu_SSE (1u << 25)
+#define bit_cpu_SSE2 (1u << 26)
+#define bit_cpu_SS (1u << 27)
+#define bit_cpu_HTT (1u << 28)
+#define bit_cpu_TM (1u << 29)
+#define bit_cpu_PBE (1u << 31)
+
+/* COMMON_CPUID_INDEX_7. */
+
+/* EBX. */
+#define bit_cpu_FSGSBASE (1u << 0)
+#define bit_cpu_TSC_ADJUST (1u << 1)
+#define bit_cpu_SGX (1u << 2)
+#define bit_cpu_BMI1 (1u << 3)
+#define bit_cpu_HLE (1u << 4)
+#define bit_cpu_AVX2 (1u << 5)
+#define bit_cpu_SMEP (1u << 7)
+#define bit_cpu_BMI2 (1u << 8)
+#define bit_cpu_ERMS (1u << 9)
+#define bit_cpu_INVPCID (1u << 10)
+#define bit_cpu_RTM (1u << 11)
+#define bit_cpu_PQM (1u << 12)
+#define bit_cpu_MPX (1u << 14)
+#define bit_cpu_PQE (1u << 15)
+#define bit_cpu_AVX512F (1u << 16)
+#define bit_cpu_AVX512DQ (1u << 17)
+#define bit_cpu_RDSEED (1u << 18)
+#define bit_cpu_ADX (1u << 19)
+#define bit_cpu_SMAP (1u << 20)
+#define bit_cpu_AVX512_IFMA (1u << 21)
+#define bit_cpu_CLFLUSHOPT (1u << 22)
+#define bit_cpu_CLWB (1u << 24)
+#define bit_cpu_TRACE (1u << 25)
+#define bit_cpu_AVX512PF (1u << 26)
+#define bit_cpu_AVX512ER (1u << 27)
+#define bit_cpu_AVX512CD (1u << 28)
+#define bit_cpu_SHA (1u << 29)
+#define bit_cpu_AVX512BW (1u << 30)
+#define bit_cpu_AVX512VL (1u << 31)
+
+/* ECX. */
+#define bit_cpu_PREFETCHWT1 (1u << 0)
+#define bit_cpu_AVX512_VBMI (1u << 1)
+#define bit_cpu_UMIP (1u << 2)
+#define bit_cpu_PKU (1u << 3)
+#define bit_cpu_OSPKE (1u << 4)
+#define bit_cpu_WAITPKG (1u << 5)
+#define bit_cpu_AVX512_VBMI2 (1u << 6)
+#define bit_cpu_SHSTK (1u << 7)
+#define bit_cpu_GFNI (1u << 8)
+#define bit_cpu_VAES (1u << 9)
+#define bit_cpu_VPCLMULQDQ (1u << 10)
+#define bit_cpu_AVX512_VNNI (1u << 11)
+#define bit_cpu_AVX512_BITALG (1u << 12)
+#define bit_cpu_AVX512_VPOPCNTDQ (1u << 14)
+#define bit_cpu_RDPID (1u << 22)
+#define bit_cpu_CLDEMOTE (1u << 25)
+#define bit_cpu_MOVDIRI (1u << 27)
+#define bit_cpu_MOVDIR64B (1u << 28)
+#define bit_cpu_SGX_LC (1u << 30)
+
+/* EDX. */
+#define bit_cpu_AVX512_4VNNIW (1u << 2)
+#define bit_cpu_AVX512_4FMAPS (1u << 3)
+#define bit_cpu_FSRM (1u << 4)
+#define bit_cpu_PCONFIG (1u << 18)
+#define bit_cpu_IBT (1u << 20)
+#define bit_cpu_IBRS_IBPB (1u << 26)
+#define bit_cpu_STIBP (1u << 27)
+#define bit_cpu_CAPABILITIES (1u << 29)
+#define bit_cpu_SSBD (1u << 31)
+
+/* COMMON_CPUID_INDEX_80000001. */
+
+/* ECX. */
+#define bit_cpu_LAHF64_SAHF64 (1u << 0)
+#define bit_cpu_SVM (1u << 2)
+#define bit_cpu_LZCNT (1u << 5)
+#define bit_cpu_SSE4A (1u << 6)
+#define bit_cpu_PREFETCHW (1u << 8)
+#define bit_cpu_XOP (1u << 11)
+#define bit_cpu_LWP (1u << 15)
+#define bit_cpu_FMA4 (1u << 16)
+#define bit_cpu_TBM (1u << 21)
+
+/* EDX. */
+#define bit_cpu_SYSCALL_SYSRET (1u << 11)
+#define bit_cpu_NX (1u << 20)
+#define bit_cpu_PAGE1GB (1u << 26)
+#define bit_cpu_RDTSCP (1u << 27)
+#define bit_cpu_LM (1u << 29)
+
+/* COMMON_CPUID_INDEX_D_ECX_1. */
+
+/* EAX. */
+#define bit_cpu_XSAVEOPT (1u << 0)
+#define bit_cpu_XSAVEC (1u << 1)
+#define bit_cpu_XGETBV_ECX_1 (1u << 2)
+#define bit_cpu_XSAVES (1u << 3)
+
+/* COMMON_CPUID_INDEX_80000007. */
+
+/* EDX. */
+#define bit_cpu_INVARIANT_TSC (1u << 8)
+
+/* COMMON_CPUID_INDEX_80000008. */
+
+/* EBX. */
+#define bit_cpu_WBNOINVD (1u << 9)
+
+/* COMMON_CPUID_INDEX_1. */
+
+/* ECX. */
+#define index_cpu_SSE3 COMMON_CPUID_INDEX_1
+#define index_cpu_PCLMULQDQ COMMON_CPUID_INDEX_1
+#define index_cpu_DTES64 COMMON_CPUID_INDEX_1
+#define index_cpu_MONITOR COMMON_CPUID_INDEX_1
+#define index_cpu_DS_CPL COMMON_CPUID_INDEX_1
+#define index_cpu_VMX COMMON_CPUID_INDEX_1
+#define index_cpu_SMX COMMON_CPUID_INDEX_1
+#define index_cpu_EST COMMON_CPUID_INDEX_1
+#define index_cpu_TM2 COMMON_CPUID_INDEX_1
+#define index_cpu_SSSE3 COMMON_CPUID_INDEX_1
+#define index_cpu_CNXT_ID COMMON_CPUID_INDEX_1
+#define index_cpu_SDBG COMMON_CPUID_INDEX_1
+#define index_cpu_FMA COMMON_CPUID_INDEX_1
+#define index_cpu_CMPXCHG16B COMMON_CPUID_INDEX_1
+#define index_cpu_XTPRUPDCTRL COMMON_CPUID_INDEX_1
+#define index_cpu_PDCM COMMON_CPUID_INDEX_1
+#define index_cpu_PCID COMMON_CPUID_INDEX_1
+#define index_cpu_DCA COMMON_CPUID_INDEX_1
+#define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1
+#define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1
+#define index_cpu_X2APIC COMMON_CPUID_INDEX_1
+#define index_cpu_MOVBE COMMON_CPUID_INDEX_1
+#define index_cpu_POPCNT COMMON_CPUID_INDEX_1
+#define index_cpu_TSC_DEADLINE COMMON_CPUID_INDEX_1
+#define index_cpu_AES COMMON_CPUID_INDEX_1
+#define index_cpu_XSAVE COMMON_CPUID_INDEX_1
+#define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
+#define index_cpu_AVX COMMON_CPUID_INDEX_1
+#define index_cpu_F16C COMMON_CPUID_INDEX_1
+#define index_cpu_RDRAND COMMON_CPUID_INDEX_1
+
+/* ECX. */
+#define index_cpu_FPU COMMON_CPUID_INDEX_1
+#define index_cpu_VME COMMON_CPUID_INDEX_1
+#define index_cpu_DE COMMON_CPUID_INDEX_1
+#define index_cpu_PSE COMMON_CPUID_INDEX_1
+#define index_cpu_TSC COMMON_CPUID_INDEX_1
+#define index_cpu_MSR COMMON_CPUID_INDEX_1
+#define index_cpu_PAE COMMON_CPUID_INDEX_1
+#define index_cpu_MCE COMMON_CPUID_INDEX_1
+#define index_cpu_CX8 COMMON_CPUID_INDEX_1
+#define index_cpu_APIC COMMON_CPUID_INDEX_1
+#define index_cpu_SEP COMMON_CPUID_INDEX_1
+#define index_cpu_MTRR COMMON_CPUID_INDEX_1
+#define index_cpu_PGE COMMON_CPUID_INDEX_1
+#define index_cpu_MCA COMMON_CPUID_INDEX_1
+#define index_cpu_CMOV COMMON_CPUID_INDEX_1
+#define index_cpu_PAT COMMON_CPUID_INDEX_1
+#define index_cpu_PSE_36 COMMON_CPUID_INDEX_1
+#define index_cpu_PSN COMMON_CPUID_INDEX_1
+#define index_cpu_CLFSH COMMON_CPUID_INDEX_1
+#define index_cpu_DS COMMON_CPUID_INDEX_1
+#define index_cpu_ACPI COMMON_CPUID_INDEX_1
+#define index_cpu_MMX COMMON_CPUID_INDEX_1
+#define index_cpu_FXSR COMMON_CPUID_INDEX_1
+#define index_cpu_SSE COMMON_CPUID_INDEX_1
+#define index_cpu_SSE2 COMMON_CPUID_INDEX_1
+#define index_cpu_SS COMMON_CPUID_INDEX_1
+#define index_cpu_HTT COMMON_CPUID_INDEX_1
+#define index_cpu_TM COMMON_CPUID_INDEX_1
+#define index_cpu_PBE COMMON_CPUID_INDEX_1
+
+/* COMMON_CPUID_INDEX_7. */
+
+/* EBX. */
+#define index_cpu_FSGSBASE COMMON_CPUID_INDEX_7
+#define index_cpu_TSC_ADJUST COMMON_CPUID_INDEX_7
+#define index_cpu_SGX COMMON_CPUID_INDEX_7
+#define index_cpu_BMI1 COMMON_CPUID_INDEX_7
+#define index_cpu_HLE COMMON_CPUID_INDEX_7
+#define index_cpu_AVX2 COMMON_CPUID_INDEX_7
+#define index_cpu_SMEP COMMON_CPUID_INDEX_7
+#define index_cpu_BMI2 COMMON_CPUID_INDEX_7
+#define index_cpu_ERMS COMMON_CPUID_INDEX_7
+#define index_cpu_INVPCID COMMON_CPUID_INDEX_7
+#define index_cpu_RTM COMMON_CPUID_INDEX_7
+#define index_cpu_PQM COMMON_CPUID_INDEX_7
+#define index_cpu_MPX COMMON_CPUID_INDEX_7
+#define index_cpu_PQE COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512F COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7
+#define index_cpu_RDSEED COMMON_CPUID_INDEX_7
+#define index_cpu_ADX COMMON_CPUID_INDEX_7
+#define index_cpu_SMAP COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_IFMA COMMON_CPUID_INDEX_7
+#define index_cpu_CLFLUSHOPT COMMON_CPUID_INDEX_7
+#define index_cpu_CLWB COMMON_CPUID_INDEX_7
+#define index_cpu_TRACE COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512PF COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512ER COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512CD COMMON_CPUID_INDEX_7
+#define index_cpu_SHA COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512BW COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512VL COMMON_CPUID_INDEX_7
+
+/* ECX. */
+#define index_cpu_PREFETCHWT1 COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VBMI COMMON_CPUID_INDEX_7
+#define index_cpu_UMIP COMMON_CPUID_INDEX_7
+#define index_cpu_PKU COMMON_CPUID_INDEX_7
+#define index_cpu_OSPKE COMMON_CPUID_INDEX_7
+#define index_cpu_WAITPKG COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VBMI2 COMMON_CPUID_INDEX_7
+#define index_cpu_SHSTK COMMON_CPUID_INDEX_7
+#define index_cpu_GFNI COMMON_CPUID_INDEX_7
+#define index_cpu_VAES COMMON_CPUID_INDEX_7
+#define index_cpu_VPCLMULQDQ COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VNNI COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_BITALG COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VPOPCNTDQ COMMON_CPUID_INDEX_7
+#define index_cpu_RDPID COMMON_CPUID_INDEX_7
+#define index_cpu_CLDEMOTE COMMON_CPUID_INDEX_7
+#define index_cpu_MOVDIRI COMMON_CPUID_INDEX_7
+#define index_cpu_MOVDIR64B COMMON_CPUID_INDEX_7
+#define index_cpu_SGX_LC COMMON_CPUID_INDEX_7
+
+/* EDX. */
+#define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_4FMAPS COMMON_CPUID_INDEX_7
+#define index_cpu_FSRM COMMON_CPUID_INDEX_7
+#define index_cpu_PCONFIG COMMON_CPUID_INDEX_7
+#define index_cpu_IBT COMMON_CPUID_INDEX_7
+#define index_cpu_IBRS_IBPB COMMON_CPUID_INDEX_7
+#define index_cpu_STIBP COMMON_CPUID_INDEX_7
+#define index_cpu_CAPABILITIES COMMON_CPUID_INDEX_7
+#define index_cpu_SSBD COMMON_CPUID_INDEX_7
+
+/* COMMON_CPUID_INDEX_80000001. */
+
+/* ECX. */
+#define index_cpu_LAHF64_SAHF64 COMMON_CPUID_INDEX_80000001
+#define index_cpu_SVM COMMON_CPUID_INDEX_80000001
+#define index_cpu_LZCNT COMMON_CPUID_INDEX_80000001
+#define index_cpu_SSE4A COMMON_CPUID_INDEX_80000001
+#define index_cpu_PREFETCHW COMMON_CPUID_INDEX_80000001
+#define index_cpu_XOP COMMON_CPUID_INDEX_80000001
+#define index_cpu_LWP COMMON_CPUID_INDEX_80000001
+#define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
+#define index_cpu_TBM COMMON_CPUID_INDEX_80000001
+
+/* EDX. */
+#define index_cpu_SYSCALL_SYSRET COMMON_CPUID_INDEX_80000001
+#define index_cpu_NX COMMON_CPUID_INDEX_80000001
+#define index_cpu_PAGE1GB COMMON_CPUID_INDEX_80000001
+#define index_cpu_RDTSCP COMMON_CPUID_INDEX_80000001
+#define index_cpu_LM COMMON_CPUID_INDEX_80000001
+
+/* COMMON_CPUID_INDEX_D_ECX_1. */
+
+/* EAX. */
+#define index_cpu_XSAVEOPT COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XSAVEC COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XGETBV_ECX_1 COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XSAVES COMMON_CPUID_INDEX_D_ECX_1
+
+/* COMMON_CPUID_INDEX_80000007. */
+
+/* EDX. */
+#define index_cpu_INVARIANT_TSC COMMON_CPUID_INDEX_80000007
+
+/* COMMON_CPUID_INDEX_80000008. */
+
+/* EBX. */
+#define index_cpu_WBNOINVD COMMON_CPUID_INDEX_80000008
+
+/* COMMON_CPUID_INDEX_1. */
+
+/* ECX. */
+#define reg_SSE3 ecx
+#define reg_PCLMULQDQ ecx
+#define reg_DTES64 ecx
+#define reg_MONITOR ecx
+#define reg_DS_CPL ecx
+#define reg_VMX ecx
+#define reg_SMX ecx
+#define reg_EST ecx
+#define reg_TM2 ecx
+#define reg_SSSE3 ecx
+#define reg_CNXT_ID ecx
+#define reg_SDBG ecx
+#define reg_FMA ecx
+#define reg_CMPXCHG16B ecx
+#define reg_XTPRUPDCTRL ecx
+#define reg_PDCM ecx
+#define reg_PCID ecx
+#define reg_DCA ecx
+#define reg_SSE4_1 ecx
+#define reg_SSE4_2 ecx
+#define reg_X2APIC ecx
+#define reg_MOVBE ecx
+#define reg_POPCNT ecx
+#define reg_TSC_DEADLINE ecx
+#define reg_AES ecx
+#define reg_XSAVE ecx
+#define reg_OSXSAVE ecx
+#define reg_AVX ecx
+#define reg_F16C ecx
+#define reg_RDRAND ecx
+
+/* EDX. */
+#define reg_FPU edx
+#define reg_VME edx
+#define reg_DE edx
+#define reg_PSE edx
+#define reg_TSC edx
+#define reg_MSR edx
+#define reg_PAE edx
+#define reg_MCE edx
+#define reg_CX8 edx
+#define reg_APIC edx
+#define reg_SEP edx
+#define reg_MTRR edx
+#define reg_PGE edx
+#define reg_MCA edx
+#define reg_CMOV edx
+#define reg_PAT edx
+#define reg_PSE_36 edx
+#define reg_PSN edx
+#define reg_CLFSH edx
+#define reg_DS edx
+#define reg_ACPI edx
+#define reg_MMX edx
+#define reg_FXSR edx
+#define reg_SSE edx
+#define reg_SSE2 edx
+#define reg_SS edx
+#define reg_HTT edx
+#define reg_TM edx
+#define reg_PBE edx
+
+/* COMMON_CPUID_INDEX_7. */
+
+/* EBX. */
+#define reg_FSGSBASE ebx
+#define reg_TSC_ADJUST ebx
+#define reg_SGX ebx
+#define reg_BMI1 ebx
+#define reg_HLE ebx
+#define reg_BMI2 ebx
+#define reg_AVX2 ebx
+#define reg_SMEP ebx
+#define reg_ERMS ebx
+#define reg_INVPCID ebx
+#define reg_RTM ebx
+#define reg_PQM ebx
+#define reg_MPX ebx
+#define reg_PQE ebx
+#define reg_AVX512F ebx
+#define reg_AVX512DQ ebx
+#define reg_RDSEED ebx
+#define reg_ADX ebx
+#define reg_SMAP ebx
+#define reg_AVX512_IFMA ebx
+#define reg_CLFLUSHOPT ebx
+#define reg_CLWB ebx
+#define reg_TRACE ebx
+#define reg_AVX512PF ebx
+#define reg_AVX512ER ebx
+#define reg_AVX512CD ebx
+#define reg_SHA ebx
+#define reg_AVX512BW ebx
+#define reg_AVX512VL ebx
+
+/* ECX. */
+#define reg_PREFETCHWT1 ecx
+#define reg_AVX512_VBMI ecx
+#define reg_UMIP ecx
+#define reg_PKU ecx
+#define reg_OSPKE ecx
+#define reg_WAITPKG ecx
+#define reg_AVX512_VBMI2 ecx
+#define reg_SHSTK ecx
+#define reg_GFNI ecx
+#define reg_VAES ecx
+#define reg_VPCLMULQDQ ecx
+#define reg_AVX512_VNNI ecx
+#define reg_AVX512_BITALG ecx
+#define reg_AVX512_VPOPCNTDQ ecx
+#define reg_RDPID ecx
+#define reg_CLDEMOTE ecx
+#define reg_MOVDIRI ecx
+#define reg_MOVDIR64B ecx
+#define reg_SGX_LC ecx
+
+/* EDX. */
+#define reg_AVX512_4VNNIW edx
+#define reg_AVX512_4FMAPS edx
+#define reg_FSRM edx
+#define reg_PCONFIG edx
+#define reg_IBT edx
+#define reg_IBRS_IBPB edx
+#define reg_STIBP edx
+#define reg_CAPABILITIES edx
+#define reg_SSBD edx
+
+/* COMMON_CPUID_INDEX_80000001. */
+
+/* ECX. */
+#define reg_LAHF64_SAHF64 ecx
+#define reg_SVM ecx
+#define reg_LZCNT ecx
+#define reg_SSE4A ecx
+#define reg_PREFETCHW ecx
+#define reg_XOP ecx
+#define reg_LWP ecx
+#define reg_FMA4 ecx
+#define reg_TBM ecx
+
+/* EDX. */
+#define reg_SYSCALL_SYSRET edx
+#define reg_NX edx
+#define reg_PAGE1GB edx
+#define reg_RDTSCP edx
+#define reg_LM edx
+
+/* COMMON_CPUID_INDEX_D_ECX_1. */
+
+/* EAX. */
+#define reg_XSAVEOPT eax
+#define reg_XSAVEC eax
+#define reg_XGETBV_ECX_1 eax
+#define reg_XSAVES eax
+
+/* COMMON_CPUID_INDEX_80000007. */
+
+/* EDX. */
+#define reg_INVARIANT_TSC edx
+
+/* COMMON_CPUID_INDEX_80000008. */
+
+/* EBX. */
+#define reg_WBNOINVD ebx
+
+/* FEATURE_INDEX_2. */
+#define bit_arch_I586 (1u << 0)
+#define bit_arch_I686 (1u << 1)
+#define bit_arch_Fast_Rep_String (1u << 2)
+#define bit_arch_Fast_Copy_Backward (1u << 3)
+#define bit_arch_Fast_Unaligned_Load (1u << 4)
+#define bit_arch_Fast_Unaligned_Copy (1u << 5)
+#define bit_arch_Slow_BSF (1u << 6)
+#define bit_arch_Slow_SSE4_2 (1u << 7)
+#define bit_arch_AVX_Fast_Unaligned_Load (1u << 8)
+#define bit_arch_Prefer_MAP_32BIT_EXEC (1u << 9)
+#define bit_arch_Prefer_PMINUB_for_stringop (1u << 10)
+#define bit_arch_Prefer_No_VZEROUPPER (1u << 11)
+#define bit_arch_Prefer_ERMS (1u << 12)
+#define bit_arch_Prefer_FSRM (1u << 13)
+#define bit_arch_Prefer_No_AVX512 (1u << 14)
+#define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15)
+
+#define index_arch_Fast_Rep_String FEATURE_INDEX_2
+#define index_arch_Fast_Copy_Backward FEATURE_INDEX_2
+#define index_arch_Slow_BSF FEATURE_INDEX_2
+#define index_arch_Fast_Unaligned_Load FEATURE_INDEX_2
+#define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_2
+#define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_2
+#define index_arch_I586 FEATURE_INDEX_2
+#define index_arch_I686 FEATURE_INDEX_2
+#define index_arch_Slow_SSE4_2 FEATURE_INDEX_2
+#define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_2
+#define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_2
+#define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_2
+#define index_arch_Prefer_ERMS FEATURE_INDEX_2
+#define index_arch_Prefer_No_AVX512 FEATURE_INDEX_2
+#define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_2
+#define index_arch_Prefer_FSRM FEATURE_INDEX_2
+
+/* XCR0 Feature flags. */
+#define bit_XMM_state (1u << 1)
+#define bit_YMM_state (1u << 2)
+#define bit_Opmask_state (1u << 5)
+#define bit_ZMM0_15_state (1u << 6)
+#define bit_ZMM16_31_state (1u << 7)
+
+# if defined (_LIBC) && !IS_IN (nonlib)
+/* Unused for x86. */
+# define INIT_ARCH()
+# define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
+# define x86_get_cpuid_registers(i) \
+ (&(GLRO(dl_x86_cpu_features).cpuid[i]))
+# endif
#ifdef __x86_64__
# define HAS_CPUID 1
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
index b2fac19..64a7fd6 100644
--- a/sysdeps/x86/tst-get-cpu-features.c
+++ b/sysdeps/x86/tst-get-cpu-features.c
@@ -17,15 +17,271 @@
<http://www.gnu.org/licenses/>. */
#include <stdlib.h>
+#include <stdio.h>
#include <cpu-features.h>
+#include <support/check.h>
+
+#define CHECK_CPU_FEATURE(name) \
+ { \
+ if (HAS_CPU_FEATURE (name)) \
+ printf (" " #name "\n"); \
+ }
+
+#define CHECK_CPU_FEATURE_USABLE(name) \
+ { \
+ if (CPU_FEATURE_USABLE(name)) \
+ printf (" " #name "\n"); \
+ }
+
+static const char * const cpu_kinds[] =
+{
+ "Unknown",
+ "Intel",
+ "AMD",
+ "Other",
+};
static int
do_test (void)
{
- if (__get_cpu_features ()->kind == arch_kind_unknown)
- abort ();
+ const struct cpu_features *cpu_features = __get_cpu_features ();
+
+ switch (cpu_features->basic.kind)
+ {
+ case arch_kind_intel:
+ case arch_kind_amd:
+ case arch_kind_other:
+ printf ("Vendor: %s\n", cpu_kinds[cpu_features->basic.kind]);
+ printf ("Family: 0x%x\n", cpu_features->basic.family);
+ printf ("Model: 0x%x\n", cpu_features->basic.model);
+ printf ("Stepping: 0x%x\n", cpu_features->basic.stepping);
+ break;
+
+ default:
+ abort ();
+ }
+
+#ifdef __SSE2__
+ TEST_VERIFY_EXIT (HAS_CPU_FEATURE (SSE2));
+#endif
+
+ printf ("CPU features:\n");
+ CHECK_CPU_FEATURE (SSE3);
+ CHECK_CPU_FEATURE (PCLMULQDQ);
+ CHECK_CPU_FEATURE (DTES64);
+ CHECK_CPU_FEATURE (MONITOR);
+ CHECK_CPU_FEATURE (DS_CPL);
+ CHECK_CPU_FEATURE (VMX);
+ CHECK_CPU_FEATURE (SMX);
+ CHECK_CPU_FEATURE (EST);
+ CHECK_CPU_FEATURE (TM2);
+ CHECK_CPU_FEATURE (SSSE3);
+ CHECK_CPU_FEATURE (CNXT_ID);
+ CHECK_CPU_FEATURE (SDBG);
+ CHECK_CPU_FEATURE (FMA);
+ CHECK_CPU_FEATURE (CMPXCHG16B);
+ CHECK_CPU_FEATURE (XTPRUPDCTRL);
+ CHECK_CPU_FEATURE (PDCM);
+ CHECK_CPU_FEATURE (PCID);
+ CHECK_CPU_FEATURE (DCA);
+ CHECK_CPU_FEATURE (SSE4_1);
+ CHECK_CPU_FEATURE (SSE4_2);
+ CHECK_CPU_FEATURE (X2APIC);
+ CHECK_CPU_FEATURE (MOVBE);
+ CHECK_CPU_FEATURE (POPCNT);
+ CHECK_CPU_FEATURE (TSC_DEADLINE);
+ CHECK_CPU_FEATURE (AES);
+ CHECK_CPU_FEATURE (XSAVE);
+ CHECK_CPU_FEATURE (OSXSAVE);
+ CHECK_CPU_FEATURE (AVX);
+ CHECK_CPU_FEATURE (F16C);
+ CHECK_CPU_FEATURE (RDRAND);
+ CHECK_CPU_FEATURE (FPU);
+ CHECK_CPU_FEATURE (VME);
+ CHECK_CPU_FEATURE (DE);
+ CHECK_CPU_FEATURE (PSE);
+ CHECK_CPU_FEATURE (TSC);
+ CHECK_CPU_FEATURE (MSR);
+ CHECK_CPU_FEATURE (PAE);
+ CHECK_CPU_FEATURE (MCE);
+ CHECK_CPU_FEATURE (CX8);
+ CHECK_CPU_FEATURE (APIC);
+ CHECK_CPU_FEATURE (SEP);
+ CHECK_CPU_FEATURE (MTRR);
+ CHECK_CPU_FEATURE (PGE);
+ CHECK_CPU_FEATURE (MCA);
+ CHECK_CPU_FEATURE (CMOV);
+ CHECK_CPU_FEATURE (PAT);
+ CHECK_CPU_FEATURE (PSE_36);
+ CHECK_CPU_FEATURE (PSN);
+ CHECK_CPU_FEATURE (CLFSH);
+ CHECK_CPU_FEATURE (DS);
+ CHECK_CPU_FEATURE (ACPI);
+ CHECK_CPU_FEATURE (MMX);
+ CHECK_CPU_FEATURE (FXSR);
+ CHECK_CPU_FEATURE (SSE);
+ CHECK_CPU_FEATURE (SSE2);
+ CHECK_CPU_FEATURE (SS);
+ CHECK_CPU_FEATURE (HTT);
+ CHECK_CPU_FEATURE (TM);
+ CHECK_CPU_FEATURE (PBE);
+ CHECK_CPU_FEATURE (FSGSBASE);
+ CHECK_CPU_FEATURE (TSC_ADJUST);
+ CHECK_CPU_FEATURE (SGX);
+ CHECK_CPU_FEATURE (BMI1);
+ CHECK_CPU_FEATURE (HLE);
+ CHECK_CPU_FEATURE (AVX2);
+ CHECK_CPU_FEATURE (SMEP);
+ CHECK_CPU_FEATURE (BMI2);
+ CHECK_CPU_FEATURE (ERMS);
+ CHECK_CPU_FEATURE (INVPCID);
+ CHECK_CPU_FEATURE (RTM);
+ CHECK_CPU_FEATURE (PQM);
+ CHECK_CPU_FEATURE (MPX);
+ CHECK_CPU_FEATURE (PQE);
+ CHECK_CPU_FEATURE (AVX512F);
+ CHECK_CPU_FEATURE (AVX512DQ);
+ CHECK_CPU_FEATURE (RDSEED);
+ CHECK_CPU_FEATURE (ADX);
+ CHECK_CPU_FEATURE (SMAP);
+ CHECK_CPU_FEATURE (AVX512_IFMA);
+ CHECK_CPU_FEATURE (CLFLUSHOPT);
+ CHECK_CPU_FEATURE (CLWB);
+ CHECK_CPU_FEATURE (TRACE);
+ CHECK_CPU_FEATURE (AVX512PF);
+ CHECK_CPU_FEATURE (AVX512ER);
+ CHECK_CPU_FEATURE (AVX512CD);
+ CHECK_CPU_FEATURE (SHA);
+ CHECK_CPU_FEATURE (AVX512BW);
+ CHECK_CPU_FEATURE (AVX512VL);
+ CHECK_CPU_FEATURE (PREFETCHWT1);
+ CHECK_CPU_FEATURE (AVX512_VBMI);
+ CHECK_CPU_FEATURE (UMIP);
+ CHECK_CPU_FEATURE (PKU);
+ CHECK_CPU_FEATURE (OSPKE);
+ CHECK_CPU_FEATURE (WAITPKG);
+ CHECK_CPU_FEATURE (AVX512_VBMI2);
+ CHECK_CPU_FEATURE (SHSTK);
+ CHECK_CPU_FEATURE (GFNI);
+ CHECK_CPU_FEATURE (VAES);
+ CHECK_CPU_FEATURE (VPCLMULQDQ);
+ CHECK_CPU_FEATURE (AVX512_VNNI);
+ CHECK_CPU_FEATURE (AVX512_BITALG);
+ CHECK_CPU_FEATURE (AVX512_VPOPCNTDQ);
+ CHECK_CPU_FEATURE (RDPID);
+ CHECK_CPU_FEATURE (CLDEMOTE);
+ CHECK_CPU_FEATURE (MOVDIRI);
+ CHECK_CPU_FEATURE (MOVDIR64B);
+ CHECK_CPU_FEATURE (SGX_LC);
+ CHECK_CPU_FEATURE (AVX512_4VNNIW);
+ CHECK_CPU_FEATURE (AVX512_4FMAPS);
+ CHECK_CPU_FEATURE (FSRM);
+ CHECK_CPU_FEATURE (PCONFIG);
+ CHECK_CPU_FEATURE (IBT);
+ CHECK_CPU_FEATURE (IBRS_IBPB);
+ CHECK_CPU_FEATURE (STIBP);
+ CHECK_CPU_FEATURE (CAPABILITIES);
+ CHECK_CPU_FEATURE (SSBD);
+ CHECK_CPU_FEATURE (LAHF64_SAHF64);
+ CHECK_CPU_FEATURE (SVM);
+ CHECK_CPU_FEATURE (LZCNT);
+ CHECK_CPU_FEATURE (SSE4A);
+ CHECK_CPU_FEATURE (PREFETCHW);
+ CHECK_CPU_FEATURE (XOP);
+ CHECK_CPU_FEATURE (LWP);
+ CHECK_CPU_FEATURE (FMA4);
+ CHECK_CPU_FEATURE (TBM);
+ CHECK_CPU_FEATURE (SYSCALL_SYSRET);
+ CHECK_CPU_FEATURE (NX);
+ CHECK_CPU_FEATURE (PAGE1GB);
+ CHECK_CPU_FEATURE (RDTSCP);
+ CHECK_CPU_FEATURE (LM);
+ CHECK_CPU_FEATURE (XSAVEOPT);
+ CHECK_CPU_FEATURE (XSAVEC);
+ CHECK_CPU_FEATURE (XGETBV_ECX_1);
+ CHECK_CPU_FEATURE (XSAVES);
+ CHECK_CPU_FEATURE (INVARIANT_TSC);
+ CHECK_CPU_FEATURE (WBNOINVD);
+
+ printf ("Usable CPU features:\n");
+ CHECK_CPU_FEATURE_USABLE (SSE3);
+ CHECK_CPU_FEATURE_USABLE (PCLMULQDQ);
+ CHECK_CPU_FEATURE_USABLE (SSSE3);
+ CHECK_CPU_FEATURE_USABLE (FMA);
+ CHECK_CPU_FEATURE_USABLE (CMPXCHG16B);
+ CHECK_CPU_FEATURE_USABLE (SSE4_1);
+ CHECK_CPU_FEATURE_USABLE (SSE4_2);
+ CHECK_CPU_FEATURE_USABLE (MOVBE);
+ CHECK_CPU_FEATURE_USABLE (POPCNT);
+ CHECK_CPU_FEATURE_USABLE (AES);
+ CHECK_CPU_FEATURE_USABLE (XSAVE);
+ CHECK_CPU_FEATURE_USABLE (OSXSAVE);
+ CHECK_CPU_FEATURE_USABLE (AVX);
+ CHECK_CPU_FEATURE_USABLE (F16C);
+ CHECK_CPU_FEATURE_USABLE (RDRAND);
+ CHECK_CPU_FEATURE_USABLE (FPU);
+ CHECK_CPU_FEATURE_USABLE (TSC);
+ CHECK_CPU_FEATURE_USABLE (MSR);
+ CHECK_CPU_FEATURE_USABLE (CX8);
+ CHECK_CPU_FEATURE_USABLE (SEP);
+ CHECK_CPU_FEATURE_USABLE (CMOV);
+ CHECK_CPU_FEATURE_USABLE (CLFSH);
+ CHECK_CPU_FEATURE_USABLE (MMX);
+ CHECK_CPU_FEATURE_USABLE (FXSR);
+ CHECK_CPU_FEATURE_USABLE (SSE);
+ CHECK_CPU_FEATURE_USABLE (SSE2);
+ CHECK_CPU_FEATURE_USABLE (FSGSBASE);
+ CHECK_CPU_FEATURE_USABLE (BMI1);
+ CHECK_CPU_FEATURE_USABLE (HLE);
+ CHECK_CPU_FEATURE_USABLE (AVX2);
+ CHECK_CPU_FEATURE_USABLE (BMI2);
+ CHECK_CPU_FEATURE_USABLE (ERMS);
+ CHECK_CPU_FEATURE_USABLE (AVX512F);
+ CHECK_CPU_FEATURE_USABLE (AVX512DQ);
+ CHECK_CPU_FEATURE_USABLE (RDSEED);
+ CHECK_CPU_FEATURE_USABLE (ADX);
+ CHECK_CPU_FEATURE_USABLE (AVX512_IFMA);
+ CHECK_CPU_FEATURE_USABLE (CLFLUSHOPT);
+ CHECK_CPU_FEATURE_USABLE (CLWB);
+ CHECK_CPU_FEATURE_USABLE (AVX512PF);
+ CHECK_CPU_FEATURE_USABLE (AVX512ER);
+ CHECK_CPU_FEATURE_USABLE (AVX512CD);
+ CHECK_CPU_FEATURE_USABLE (SHA);
+ CHECK_CPU_FEATURE_USABLE (AVX512BW);
+ CHECK_CPU_FEATURE_USABLE (AVX512VL);
+ CHECK_CPU_FEATURE_USABLE (PREFETCHWT1);
+ CHECK_CPU_FEATURE_USABLE (AVX512_VBMI);
+ CHECK_CPU_FEATURE_USABLE (AVX512_VBMI2);
+ CHECK_CPU_FEATURE_USABLE (GFNI);
+ CHECK_CPU_FEATURE_USABLE (VAES);
+ CHECK_CPU_FEATURE_USABLE (VPCLMULQDQ);
+ CHECK_CPU_FEATURE_USABLE (AVX512_VNNI);
+ CHECK_CPU_FEATURE_USABLE (AVX512_BITALG);
+ CHECK_CPU_FEATURE_USABLE (AVX512_VPOPCNTDQ);
+ CHECK_CPU_FEATURE_USABLE (RDPID);
+ CHECK_CPU_FEATURE_USABLE (CLDEMOTE);
+ CHECK_CPU_FEATURE_USABLE (MOVDIRI);
+ CHECK_CPU_FEATURE_USABLE (MOVDIR64B);
+ CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
+ CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
+ CHECK_CPU_FEATURE_USABLE (FSRM);
+ CHECK_CPU_FEATURE_USABLE (LAHF64_SAHF64);
+ CHECK_CPU_FEATURE_USABLE (LZCNT);
+ CHECK_CPU_FEATURE_USABLE (SSE4A);
+ CHECK_CPU_FEATURE_USABLE (PREFETCHW);
+ CHECK_CPU_FEATURE_USABLE (XOP);
+ CHECK_CPU_FEATURE_USABLE (FMA4);
+ CHECK_CPU_FEATURE_USABLE (TBM);
+ CHECK_CPU_FEATURE_USABLE (SYSCALL_SYSRET);
+ CHECK_CPU_FEATURE_USABLE (RDTSCP);
+ CHECK_CPU_FEATURE_USABLE (XSAVEOPT);
+ CHECK_CPU_FEATURE_USABLE (XSAVEC);
+ CHECK_CPU_FEATURE_USABLE (XGETBV_ECX_1);
+ CHECK_CPU_FEATURE_USABLE (XSAVES);
+ CHECK_CPU_FEATURE_USABLE (INVARIANT_TSC);
+ CHECK_CPU_FEATURE_USABLE (WBNOINVD);
+
return 0;
}
-#define TEST_FUNCTION do_test ()
-#include "../../test-skeleton.c"
+#include <support/test-driver.c>
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
index d10d74a..7949119 100644
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ b/sysdeps/x86_64/multiarch/sched_cpucount.c
@@ -33,4 +33,4 @@
#undef __sched_cpucount
libc_ifunc (__sched_cpucount,
- HAS_CPU_FEATURE (POPCOUNT) ? popcount_cpucount : generic_cpucount);
+ HAS_CPU_FEATURE (POPCNT) ? popcount_cpucount : generic_cpucount);
diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c
index aa872f2..417147c 100644
--- a/sysdeps/x86_64/multiarch/test-multiarch.c
+++ b/sysdeps/x86_64/multiarch/test-multiarch.c
@@ -85,8 +85,8 @@ do_test (int argc, char **argv)
, "HAS_CPU_FEATURE (SSE4_1)");
fails += check_proc ("ssse3", HAS_CPU_FEATURE (SSSE3),
"HAS_CPU_FEATURE (SSSE3)");
- fails += check_proc ("popcnt", HAS_CPU_FEATURE (POPCOUNT),
- "HAS_CPU_FEATURE (POPCOUNT)");
+ fails += check_proc ("popcnt", HAS_CPU_FEATURE (POPCNT),
+ "HAS_CPU_FEATURE (POPCNT)");
printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails);