From 44d757eb9f4484dbc3aa32042ab64cdf9374e093 Mon Sep 17 00:00:00 2001 From: MayShao-oc Date: Sat, 29 Jun 2024 11:58:26 +0800 Subject: x86: Set preferred CPU features on the KH-40000 and KX-7000 Zhaoxin processors Fix code formatting under the Zhaoxin branch and add comments for different Zhaoxin models. Unaligned AVX load are slower on KH-40000 and KX-7000, so disable the AVX_Fast_Unaligned_Load. Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to use sse2_unaligned version of memset,strcpy and strcat. Reviewed-by: Noah Goldstein --- sysdeps/x86/cpu-features.c | 51 +++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 16 deletions(-) (limited to 'sysdeps/x86/cpu-features.c') diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 3d7c281..1927f65 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -1023,39 +1023,58 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht model += extended_model; if (family == 0x6) - { - if (model == 0xf || model == 0x19) - { + { + /* Tuning for older Zhaoxin processors. */ + if (model == 0xf || model == 0x19) + { CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); - cpu_features->preferred[index_arch_Slow_SSE4_2] - |= bit_arch_Slow_SSE4_2; + cpu_features->preferred[index_arch_Slow_SSE4_2] + |= bit_arch_Slow_SSE4_2; + /* Unaligned AVX loads are slower. */ cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; - } - } + &= ~bit_arch_AVX_Fast_Unaligned_Load; + } + } else if (family == 0x7) - { - if (model == 0x1b) + { + switch (model) { + /* Wudaokou microarch tuning. */ + case 0x1b: CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_Slow_SSE4_2] - |= bit_arch_Slow_SSE4_2; + |= bit_arch_Slow_SSE4_2; cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; - } - else if (model == 0x3b) - { + &= ~bit_arch_AVX_Fast_Unaligned_Load; + break; + + /* Lujiazui microarch tuning. */ + case 0x3b: CPU_FEATURE_UNSET (cpu_features, AVX); CPU_FEATURE_UNSET (cpu_features, AVX2); cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] - &= ~bit_arch_AVX_Fast_Unaligned_Load; + &= ~bit_arch_AVX_Fast_Unaligned_Load; + break; + + /* Yongfeng and Shijidadao mircoarch tuning. */ + case 0x5b: + case 0x6b: + cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load] + &= ~bit_arch_AVX_Fast_Unaligned_Load; + + /* To use sse2_unaligned versions of memset, strcpy and strcat. + */ + cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER] + |= (bit_arch_Prefer_No_VZEROUPPER + | bit_arch_Fast_Unaligned_Load); + break; } } } -- cgit v1.1