aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Damato <jdamato@fastly.com>2024-06-07 23:04:47 +0000
committerNoah Goldstein <goldstein.w.n@gmail.com>2024-06-10 16:18:18 -0500
commitbef2a827a55fc759693ccc5b0f614353b8ad712d (patch)
treefbe0af5c5fa762b6086fd47efc8633d3ad1a4722
parent5968125f55a3a3f3394e4ebe45e1f96d4864c576 (diff)
downloadglibc-bef2a827a55fc759693ccc5b0f614353b8ad712d.zip
glibc-bef2a827a55fc759693ccc5b0f614353b8ad712d.tar.gz
glibc-bef2a827a55fc759693ccc5b0f614353b8ad712d.tar.bz2
x86: Enable non-temporal memset tunable for AMD
In commit 46b5e98ef6f1 ("x86: Add seperate non-temporal tunable for memset") a tunable threshold for enabling non-temporal memset was added, but only for Intel hardware. Since that commit, new benchmark results suggest that non-temporal memset is beneficial on AMD, as well, so allow this tunable to be set for AMD. See: https://docs.google.com/spreadsheets/d/1opzukzvum4n6-RUVHTGddV6RjAEil4P2uMjjQGLbLcU/edit?usp=sharing which has been updated to include data using different stategies for large memset on AMD Zen2, Zen3, and Zen4. Signed-off-by: Joe Damato <jdamato@fastly.com> Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
-rw-r--r--sysdeps/x86/dl-cacheinfo.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index d375a7c..d2fe61b 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -986,11 +986,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
rep_movsb_threshold = 2112;
- /* Non-temporal stores in memset have only been tested on Intel hardware.
- Until we benchmark data on other x86 processor, disable non-temporal
- stores in memset. */
+ /* Non-temporal stores are more performant on Intel and AMD hardware above
+ non_temporal_threshold. Enable this for both Intel and AMD hardware. */
unsigned long int memset_non_temporal_threshold = SIZE_MAX;
- if (cpu_features->basic.kind == arch_kind_intel)
+ if (cpu_features->basic.kind == arch_kind_intel
+ || cpu_features->basic.kind == arch_kind_amd)
memset_non_temporal_threshold = non_temporal_threshold;
/* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of