aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86/cpu-features.c
diff options
context:
space:
mode:
authorCarlos O'Donell <carlos@systemhalted.org>2016-10-17 19:35:34 -0400
committerCarlos O'Donell <carlos@systemhalted.org>2016-10-17 19:39:54 -0400
commitb3d17c1cf29ac8cd6b2fcd1db7d824e4af9ea505 (patch)
tree305ca99e65878ee08dfa197fc28eaa6b4182e7b3 /sysdeps/x86/cpu-features.c
parent6f322a894704e7959b9cee7c2cc7ec1963a26e73 (diff)
downloadglibc-b3d17c1cf29ac8cd6b2fcd1db7d824e4af9ea505.zip
glibc-b3d17c1cf29ac8cd6b2fcd1db7d824e4af9ea505.tar.gz
glibc-b3d17c1cf29ac8cd6b2fcd1db7d824e4af9ea505.tar.bz2
Bug 20689: Fix FMA and AVX2 detection on Intel
In the Intel Architecture Instruction Set Extensions Programming reference the recommended way to test for FMA in section '2.2.1 Detection of FMA' is: "Application Software must identify that hardware supports AVX as explained in ... after that it must also detect support for FMA..." We don't do that in glibc. We use osxsave to detect the use of xgetbv, and after that we check for AVX and FMA orthogonally. It is conceivable that you could have the AVX bit clear and the FMA bit in an undefined state. This commit fixes FMA and AVX2 detection to depend on usable AVX as required by the recommended Intel sequences. v1: https://www.sourceware.org/ml/libc-alpha/2016-10/msg00241.html v2: https://www.sourceware.org/ml/libc-alpha/2016-10/msg00265.html
Diffstat (limited to 'sysdeps/x86/cpu-features.c')
-rw-r--r--sysdeps/x86/cpu-features.c24
1 files changed, 14 insertions, 10 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 11b9af2..e228a76 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -60,12 +60,20 @@ get_common_indeces (struct cpu_features *cpu_features,
{
/* Determine if AVX is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX))
- cpu_features->feature[index_arch_AVX_Usable]
- |= bit_arch_AVX_Usable;
- /* Determine if AVX2 is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
- cpu_features->feature[index_arch_AVX2_Usable]
- |= bit_arch_AVX2_Usable;
+ {
+ cpu_features->feature[index_arch_AVX_Usable]
+ |= bit_arch_AVX_Usable;
+ /* The following features depend on AVX being usable. */
+ /* Determine if AVX2 is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
+ cpu_features->feature[index_arch_AVX2_Usable]
+ |= bit_arch_AVX2_Usable;
+ /* Determine if FMA is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, FMA))
+ cpu_features->feature[index_arch_FMA_Usable]
+ |= bit_arch_FMA_Usable;
+ }
+
/* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
ZMM16-ZMM31 state are enabled. */
if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
@@ -83,10 +91,6 @@ get_common_indeces (struct cpu_features *cpu_features,
|= bit_arch_AVX512DQ_Usable;
}
}
- /* Determine if FMA is usable. */
- if (CPU_FEATURES_CPU_P (cpu_features, FMA))
- cpu_features->feature[index_arch_FMA_Usable]
- |= bit_arch_FMA_Usable;
}
}
}