aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/x86/cpu-features.c
diff options
context:
space:
mode:
authorMayShao-oc <MayShao-oc@zhaoxin.com>2024-06-29 11:58:26 +0800
committerH.J. Lu <hjl.tools@gmail.com>2024-06-30 06:26:43 -0700
commit44d757eb9f4484dbc3aa32042ab64cdf9374e093 (patch)
treebc12bd45006af2870317eb1b3f3d625bc9a6453b /sysdeps/x86/cpu-features.c
parent2f1f7a5f8aa7285e9a55b209dd01fdcb0d4e7281 (diff)
downloadglibc-44d757eb9f4484dbc3aa32042ab64cdf9374e093.zip
glibc-44d757eb9f4484dbc3aa32042ab64cdf9374e093.tar.gz
glibc-44d757eb9f4484dbc3aa32042ab64cdf9374e093.tar.bz2
x86: Set preferred CPU features on the KH-40000 and KX-7000 Zhaoxin processors
Fix code formatting under the Zhaoxin branch and add comments for different Zhaoxin models. Unaligned AVX load are slower on KH-40000 and KX-7000, so disable the AVX_Fast_Unaligned_Load. Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to use sse2_unaligned version of memset,strcpy and strcat. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diffstat (limited to 'sysdeps/x86/cpu-features.c')
-rw-r--r--sysdeps/x86/cpu-features.c51
1 files changed, 35 insertions, 16 deletions
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 3d7c281..1927f65 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -1023,39 +1023,58 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
model += extended_model;
if (family == 0x6)
- {
- if (model == 0xf || model == 0x19)
- {
+ {
+ /* Tuning for older Zhaoxin processors. */
+ if (model == 0xf || model == 0x19)
+ {
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
- cpu_features->preferred[index_arch_Slow_SSE4_2]
- |= bit_arch_Slow_SSE4_2;
+ cpu_features->preferred[index_arch_Slow_SSE4_2]
+ |= bit_arch_Slow_SSE4_2;
+ /* Unaligned AVX loads are slower. */
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
- }
- }
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ }
+ }
else if (family == 0x7)
- {
- if (model == 0x1b)
+ {
+ switch (model)
{
+ /* Wudaokou microarch tuning. */
+ case 0x1b:
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_Slow_SSE4_2]
- |= bit_arch_Slow_SSE4_2;
+ |= bit_arch_Slow_SSE4_2;
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
- }
- else if (model == 0x3b)
- {
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ break;
+
+ /* Lujiazui microarch tuning. */
+ case 0x3b:
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ break;
+
+ /* Yongfeng and Shijidadao mircoarch tuning. */
+ case 0x5b:
+ case 0x6b:
+ cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+
+ /* To use sse2_unaligned versions of memset, strcpy and strcat.
+ */
+ cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
+ |= (bit_arch_Prefer_No_VZEROUPPER
+ | bit_arch_Fast_Unaligned_Load);
+ break;
}
}
}