aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86/cpu-features.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86/cpu-features.c')
-rw-r--r--sysdeps/x86/cpu-features.c436
1 files changed, 249 insertions, 187 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index c7673a2..4c24ba7 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -42,73 +42,109 @@ extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
#endif
static void
-get_extended_indices (struct cpu_features *cpu_features)
+update_usable (struct cpu_features *cpu_features)
{
- unsigned int eax, ebx, ecx, edx;
- __cpuid (0x80000000, eax, ebx, ecx, edx);
- if (eax >= 0x80000001)
- __cpuid (0x80000001,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
- if (eax >= 0x80000007)
- __cpuid (0x80000007,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].edx);
- if (eax >= 0x80000008)
- __cpuid (0x80000008,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].edx);
-}
-
-static void
-get_common_indices (struct cpu_features *cpu_features,
- unsigned int *family, unsigned int *model,
- unsigned int *extended_model, unsigned int *stepping)
-{
- if (family)
- {
- unsigned int eax;
- __cpuid (1, eax, cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_1].edx);
- cpu_features->cpuid[COMMON_CPUID_INDEX_1].eax = eax;
- *family = (eax >> 8) & 0x0f;
- *model = (eax >> 4) & 0x0f;
- *extended_model = (eax >> 12) & 0xf0;
- *stepping = eax & 0x0f;
- if (*family == 0x0f)
- {
- *family += (eax >> 20) & 0xff;
- *model += *extended_model;
- }
- }
-
- if (cpu_features->basic.max_cpuid >= 7)
- {
- __cpuid_count (7, 0,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
- __cpuid_count (7, 1,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].edx);
- }
-
- if (cpu_features->basic.max_cpuid >= 0xd)
- __cpuid_count (0xd, 1,
- cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].edx);
+ /* Before COMMON_CPUID_INDEX_80000001, copy the cpuid array elements to
+ the usable array. */
+ unsigned int i;
+ for (i = 0; i < COMMON_CPUID_INDEX_80000001; i++)
+ cpu_features->features[i].usable = cpu_features->features[i].cpuid;
+
+ /* Before COMMON_CPUID_INDEX_80000001, clear the unknown usable bits
+ and the always zero bits. */
+ CPU_FEATURE_UNSET (cpu_features, INDEX_1_ECX_16);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_1_ECX_31);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_10);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_20);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_1_EDX_30);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EBX_6);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EBX_22);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_13);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_15);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_16);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_23);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_24);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_ECX_26);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_0);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_1);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_5);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_6);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_7);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_9);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_11);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_12);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_13);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_17);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_19);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_21);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_23);
+
+ /* EAX/EBX from COMMON_CPUID_INDEX_1 and EAX from COMMON_CPUID_INDEX_7
+ aren't used for CPU feature detection. */
+ cpu_features->features[COMMON_CPUID_INDEX_1].usable.eax = 0;
+ cpu_features->features[COMMON_CPUID_INDEX_1].usable.ebx = 0;
+ cpu_features->features[COMMON_CPUID_INDEX_7].usable.eax = 0;
+
+ /* Starting from COMMON_CPUID_INDEX_80000001, copy the cpuid bits to
+ usable bits. */
+ CPU_FEATURE_SET_USABLE (cpu_features, LAHF64_SAHF64);
+ CPU_FEATURE_SET_USABLE (cpu_features, SVM);
+ CPU_FEATURE_SET_USABLE (cpu_features, LZCNT);
+ CPU_FEATURE_SET_USABLE (cpu_features, SSE4A);
+ CPU_FEATURE_SET_USABLE (cpu_features, PREFETCHW);
+ CPU_FEATURE_SET_USABLE (cpu_features, XOP);
+ CPU_FEATURE_SET_USABLE (cpu_features, LWP);
+ CPU_FEATURE_SET_USABLE (cpu_features, FMA4);
+ CPU_FEATURE_SET_USABLE (cpu_features, TBM);
+ CPU_FEATURE_SET_USABLE (cpu_features, SYSCALL_SYSRET);
+ CPU_FEATURE_SET_USABLE (cpu_features, NX);
+ CPU_FEATURE_SET_USABLE (cpu_features, PAGE1GB);
+ CPU_FEATURE_SET_USABLE (cpu_features, RDTSCP);
+ CPU_FEATURE_SET_USABLE (cpu_features, LM);
+ CPU_FEATURE_SET_USABLE (cpu_features, XSAVEOPT);
+ CPU_FEATURE_SET_USABLE (cpu_features, XSAVEC);
+ CPU_FEATURE_SET_USABLE (cpu_features, XGETBV_ECX_1);
+ CPU_FEATURE_SET_USABLE (cpu_features, XSAVES);
+ CPU_FEATURE_SET_USABLE (cpu_features, XFD);
+ CPU_FEATURE_SET_USABLE (cpu_features, INVARIANT_TSC);
+ CPU_FEATURE_SET_USABLE (cpu_features, WBNOINVD);
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BF16);
+
+ /* MPX has been deprecated. */
+ CPU_FEATURE_UNSET (cpu_features, MPX);
+
+ /* Clear the usable bits which require OS support. */
+ CPU_FEATURE_UNSET (cpu_features, FMA);
+ CPU_FEATURE_UNSET (cpu_features, AVX);
+ CPU_FEATURE_UNSET (cpu_features, F16C);
+ CPU_FEATURE_UNSET (cpu_features, AVX2);
+ CPU_FEATURE_UNSET (cpu_features, AVX512F);
+ CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
+ CPU_FEATURE_UNSET (cpu_features, AVX512PF);
+ CPU_FEATURE_UNSET (cpu_features, AVX512ER);
+ CPU_FEATURE_UNSET (cpu_features, AVX512CD);
+ CPU_FEATURE_UNSET (cpu_features, AVX512BW);
+ CPU_FEATURE_UNSET (cpu_features, AVX512VL);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
+ CPU_FEATURE_UNSET (cpu_features, PKU);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
+ CPU_FEATURE_UNSET (cpu_features, VAES);
+ CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
+ CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
+ CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
+ CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
+ CPU_FEATURE_UNSET (cpu_features, XOP);
+ CPU_FEATURE_UNSET (cpu_features, FMA4);
+ CPU_FEATURE_UNSET (cpu_features, XSAVEC);
+ CPU_FEATURE_UNSET (cpu_features, XFD);
+ CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
/* Can we call xgetbv? */
if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
@@ -123,40 +159,28 @@ get_common_indices (struct cpu_features *cpu_features,
/* Determine if AVX is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX))
{
- cpu_features->usable[index_arch_AVX_Usable]
- |= bit_arch_AVX_Usable;
+ CPU_FEATURE_SET (cpu_features, AVX);
/* The following features depend on AVX being usable. */
/* Determine if AVX2 is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
- {
- cpu_features->usable[index_arch_AVX2_Usable]
- |= bit_arch_AVX2_Usable;
-
- /* Unaligned load with 256-bit AVX registers are faster on
- Intel/AMD processors with AVX2. */
- cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
- |= bit_arch_AVX_Fast_Unaligned_Load;
- }
+ {
+ CPU_FEATURE_SET (cpu_features, AVX2);
+
+ /* Unaligned load with 256-bit AVX registers are faster
+ on Intel/AMD processors with AVX2. */
+ cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+ |= bit_arch_AVX_Fast_Unaligned_Load;
+ }
/* Determine if FMA is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, FMA))
- cpu_features->usable[index_arch_FMA_Usable]
- |= bit_arch_FMA_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, FMA);
/* Determine if VAES is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, VAES))
- cpu_features->usable[index_arch_VAES_Usable]
- |= bit_arch_VAES_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, VAES);
/* Determine if VPCLMULQDQ is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, VPCLMULQDQ))
- cpu_features->usable[index_arch_VPCLMULQDQ_Usable]
- |= bit_arch_VPCLMULQDQ_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, VPCLMULQDQ);
/* Determine if XOP is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, XOP))
- cpu_features->usable[index_arch_XOP_Usable]
- |= bit_arch_XOP_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, XOP);
/* Determine if F16C is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, F16C))
- cpu_features->usable[index_arch_F16C_Usable]
- |= bit_arch_F16C_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, F16C);
}
/* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
@@ -168,73 +192,41 @@ get_common_indices (struct cpu_features *cpu_features,
/* Determine if AVX512F is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
{
- cpu_features->usable[index_arch_AVX512F_Usable]
- |= bit_arch_AVX512F_Usable;
+ CPU_FEATURE_SET (cpu_features, AVX512F);
/* Determine if AVX512CD is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512CD))
- cpu_features->usable[index_arch_AVX512CD_Usable]
- |= bit_arch_AVX512CD_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512CD);
/* Determine if AVX512ER is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
- cpu_features->usable[index_arch_AVX512ER_Usable]
- |= bit_arch_AVX512ER_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512ER);
/* Determine if AVX512PF is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF))
- cpu_features->usable[index_arch_AVX512PF_Usable]
- |= bit_arch_AVX512PF_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512PF);
/* Determine if AVX512VL is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512VL))
- cpu_features->usable[index_arch_AVX512VL_Usable]
- |= bit_arch_AVX512VL_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512VL);
/* Determine if AVX512DQ is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
- cpu_features->usable[index_arch_AVX512DQ_Usable]
- |= bit_arch_AVX512DQ_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512DQ);
/* Determine if AVX512BW is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW))
- cpu_features->usable[index_arch_AVX512BW_Usable]
- |= bit_arch_AVX512BW_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512BW);
/* Determine if AVX512_4FMAPS is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4FMAPS))
- cpu_features->usable[index_arch_AVX512_4FMAPS_Usable]
- |= bit_arch_AVX512_4FMAPS_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_4FMAPS);
/* Determine if AVX512_4VNNIW is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4VNNIW))
- cpu_features->usable[index_arch_AVX512_4VNNIW_Usable]
- |= bit_arch_AVX512_4VNNIW_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_4VNNIW);
/* Determine if AVX512_BITALG is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BITALG))
- cpu_features->usable[index_arch_AVX512_BITALG_Usable]
- |= bit_arch_AVX512_BITALG_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BITALG);
/* Determine if AVX512_IFMA is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_IFMA))
- cpu_features->usable[index_arch_AVX512_IFMA_Usable]
- |= bit_arch_AVX512_IFMA_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_IFMA);
/* Determine if AVX512_VBMI is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI))
- cpu_features->usable[index_arch_AVX512_VBMI_Usable]
- |= bit_arch_AVX512_VBMI_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VBMI);
/* Determine if AVX512_VBMI2 is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI2))
- cpu_features->usable[index_arch_AVX512_VBMI2_Usable]
- |= bit_arch_AVX512_VBMI2_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VBMI2);
/* Determine if is AVX512_VNNI usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VNNI))
- cpu_features->usable[index_arch_AVX512_VNNI_Usable]
- |= bit_arch_AVX512_VNNI_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_VNNI);
/* Determine if AVX512_VPOPCNTDQ is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ))
- cpu_features->usable[index_arch_AVX512_VPOPCNTDQ_Usable]
- |= bit_arch_AVX512_VPOPCNTDQ_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features,
+ AVX512_VPOPCNTDQ);
/* Determine if AVX512_VP2INTERSECT is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features,
- AVX512_VP2INTERSECT))
- cpu_features->usable[index_arch_AVX512_VP2INTERSECT_Usable]
- |= bit_arch_AVX512_VP2INTERSECT_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features,
+ AVX512_VP2INTERSECT);
/* Determine if AVX512_BF16 is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BF16))
- cpu_features->usable[index_arch_AVX512_BF16_Usable]
- |= bit_arch_AVX512_BF16_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AVX512_BF16);
}
}
}
@@ -244,19 +236,17 @@ get_common_indices (struct cpu_features *cpu_features,
== (bit_XTILECFG_state | bit_XTILEDATA_state))
{
/* Determine if AMX_BF16 is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AMX_BF16))
- cpu_features->usable[index_arch_AMX_BF16_Usable]
- |= bit_arch_AMX_BF16_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AMX_BF16);
/* Determine if AMX_TILE is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AMX_TILE))
- cpu_features->usable[index_arch_AMX_TILE_Usable]
- |= bit_arch_AMX_TILE_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AMX_TILE);
/* Determine if AMX_INT8 is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AMX_INT8))
- cpu_features->usable[index_arch_AMX_INT8_Usable]
- |= bit_arch_AMX_INT8_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, AMX_INT8);
}
+
+ /* XFD is usable only when OSXSAVE is enabled. */
+ CPU_FEATURE_SET_USABLE (cpu_features, XFD);
+
/* For _dl_runtime_resolve, set xsave_state_size to xsave area
size + integer register save size and align it to 64 bytes. */
if (cpu_features->basic.max_cpuid >= 0xd)
@@ -318,8 +308,7 @@ get_common_indices (struct cpu_features *cpu_features,
{
cpu_features->xsave_state_size
= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
- cpu_features->usable[index_arch_XSAVEC_Usable]
- |= bit_arch_XSAVEC_Usable;
+ CPU_FEATURE_SET (cpu_features, XSAVEC);
}
}
}
@@ -328,8 +317,79 @@ get_common_indices (struct cpu_features *cpu_features,
/* Determine if PKU is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
- cpu_features->usable[index_arch_PKU_Usable]
- |= bit_arch_PKU_Usable;
+ CPU_FEATURE_SET (cpu_features, PKU);
+}
+
+static void
+get_extended_indices (struct cpu_features *cpu_features)
+{
+ unsigned int eax, ebx, ecx, edx;
+ __cpuid (0x80000000, eax, ebx, ecx, edx);
+ if (eax >= 0x80000001)
+ __cpuid (0x80000001,
+ cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.eax,
+ cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ebx,
+ cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.ecx,
+ cpu_features->features[COMMON_CPUID_INDEX_80000001].cpuid.edx);
+ if (eax >= 0x80000007)
+ __cpuid (0x80000007,
+ cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.eax,
+ cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ebx,
+ cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.ecx,
+ cpu_features->features[COMMON_CPUID_INDEX_80000007].cpuid.edx);
+ if (eax >= 0x80000008)
+ __cpuid (0x80000008,
+ cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.eax,
+ cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ebx,
+ cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.ecx,
+ cpu_features->features[COMMON_CPUID_INDEX_80000008].cpuid.edx);
+}
+
+static void
+get_common_indices (struct cpu_features *cpu_features,
+ unsigned int *family, unsigned int *model,
+ unsigned int *extended_model, unsigned int *stepping)
+{
+ if (family)
+ {
+ unsigned int eax;
+ __cpuid (1, eax,
+ cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx,
+ cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx,
+ cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.edx);
+ cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.eax = eax;
+ *family = (eax >> 8) & 0x0f;
+ *model = (eax >> 4) & 0x0f;
+ *extended_model = (eax >> 12) & 0xf0;
+ *stepping = eax & 0x0f;
+ if (*family == 0x0f)
+ {
+ *family += (eax >> 20) & 0xff;
+ *model += *extended_model;
+ }
+ }
+
+ if (cpu_features->basic.max_cpuid >= 7)
+ {
+ __cpuid_count (7, 0,
+ cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.eax,
+ cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ebx,
+ cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.ecx,
+ cpu_features->features[COMMON_CPUID_INDEX_7].cpuid.edx);
+ __cpuid_count (7, 1,
+ cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.eax,
+ cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ebx,
+ cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.ecx,
+ cpu_features->features[COMMON_CPUID_INDEX_7_ECX_1].cpuid.edx);
+ }
+
+ if (cpu_features->basic.max_cpuid >= 0xd)
+ __cpuid_count (0xd, 1,
+ cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.eax,
+ cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ebx,
+ cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.ecx,
+ cpu_features->features[COMMON_CPUID_INDEX_D_ECX_1].cpuid.edx);
+
}
_Static_assert (((index_arch_Fast_Unaligned_Load
@@ -353,8 +413,6 @@ init_cpu_features (struct cpu_features *cpu_features)
unsigned int stepping = 0;
enum cpu_features_kind kind;
- cpu_features->usable_p = cpu_features->usable;
-
#if !HAS_CPUID
if (__get_cpuid_max (0, 0) == 0)
{
@@ -377,6 +435,8 @@ init_cpu_features (struct cpu_features *cpu_features)
get_extended_indices (cpu_features);
+ update_usable (cpu_features);
+
if (family == 0x06)
{
model += extended_model;
@@ -473,7 +533,7 @@ init_cpu_features (struct cpu_features *cpu_features)
with stepping >= 4) to avoid TSX on kernels that weren't
updated with the latest microcode package (which disables
broken feature by default). */
- cpu_features->cpuid[index_cpu_RTM].reg_RTM &= ~bit_cpu_RTM;
+ CPU_FEATURE_UNSET (cpu_features, RTM);
break;
}
}
@@ -502,15 +562,15 @@ init_cpu_features (struct cpu_features *cpu_features)
get_extended_indices (cpu_features);
- ecx = cpu_features->cpuid[COMMON_CPUID_INDEX_1].ecx;
+ update_usable (cpu_features);
- if (HAS_ARCH_FEATURE (AVX_Usable))
+ ecx = cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ecx;
+
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
{
/* Since the FMA4 bit is in COMMON_CPUID_INDEX_80000001 and
FMA4 requires AVX, determine if FMA4 is usable here. */
- if (CPU_FEATURES_CPU_P (cpu_features, FMA4))
- cpu_features->usable[index_arch_FMA4_Usable]
- |= bit_arch_FMA4_Usable;
+ CPU_FEATURE_SET_USABLE (cpu_features, FMA4);
}
if (family == 0x15)
@@ -541,13 +601,15 @@ init_cpu_features (struct cpu_features *cpu_features)
get_extended_indices (cpu_features);
+ update_usable (cpu_features);
+
model += extended_model;
if (family == 0x6)
{
if (model == 0xf || model == 0x19)
{
- cpu_features->usable[index_arch_AVX_Usable]
- &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable);
+ CPU_FEATURE_UNSET (cpu_features, AVX);
+ CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_Slow_SSE4_2]
|= bit_arch_Slow_SSE4_2;
@@ -560,8 +622,8 @@ init_cpu_features (struct cpu_features *cpu_features)
{
if (model == 0x1b)
{
- cpu_features->usable[index_arch_AVX_Usable]
- &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable);
+ CPU_FEATURE_UNSET (cpu_features, AVX);
+ CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_Slow_SSE4_2]
|= bit_arch_Slow_SSE4_2;
@@ -571,8 +633,8 @@ init_cpu_features (struct cpu_features *cpu_features)
}
else if (model == 0x3b)
{
- cpu_features->usable[index_arch_AVX_Usable]
- &= ~(bit_arch_AVX_Usable | bit_arch_AVX2_Usable);
+ CPU_FEATURE_UNSET (cpu_features, AVX);
+ CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
&= ~bit_arch_AVX_Fast_Unaligned_Load;
@@ -583,6 +645,7 @@ init_cpu_features (struct cpu_features *cpu_features)
{
kind = arch_kind_other;
get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
+ update_usable (cpu_features);
}
/* Support i586 if CX8 is available. */
@@ -629,31 +692,30 @@ no_cpuid:
{
const char *platform = NULL;
- if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
- && CPU_FEATURES_CPU_P (cpu_features, AVX512CD))
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
{
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
{
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF))
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
platform = "xeon_phi";
}
else
{
- if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW)
- && CPU_FEATURES_CPU_P (cpu_features, AVX512DQ)
- && CPU_FEATURES_CPU_P (cpu_features, AVX512VL))
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
}
}
if (platform == NULL
- && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
- && CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
- && CPU_FEATURES_CPU_P (cpu_features, BMI1)
- && CPU_FEATURES_CPU_P (cpu_features, BMI2)
- && CPU_FEATURES_CPU_P (cpu_features, LZCNT)
- && CPU_FEATURES_CPU_P (cpu_features, MOVBE)
- && CPU_FEATURES_CPU_P (cpu_features, POPCNT))
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURE_USABLE_P (cpu_features, FMA)
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+ && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
+ && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
+ && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
platform = "haswell";
if (platform != NULL)
@@ -661,7 +723,7 @@ no_cpuid:
}
#else
GLRO(dl_hwcap) = 0;
- if (CPU_FEATURES_CPU_P (cpu_features, SSE2))
+ if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
if (CPU_FEATURES_ARCH_P (cpu_features, I686))
@@ -696,9 +758,9 @@ no_cpuid:
GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
*/
unsigned int cet_feature = 0;
- if (!HAS_CPU_FEATURE (IBT))
+ if (!CPU_FEATURE_USABLE (IBT))
cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
- if (!HAS_CPU_FEATURE (SHSTK))
+ if (!CPU_FEATURE_USABLE (SHSTK))
cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
if (cet_feature)