aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Goldstein <goldstein.w.n@gmail.com>2022-06-14 13:50:11 -0700
committerNoah Goldstein <goldstein.w.n@gmail.com>2022-06-14 20:58:07 -0700
commit035591551400cfc810b07244a015c9411e8bff7c (patch)
tree0bc9dfed85e4beb9b2735df6366b83f75dd1aa94
parent7374c02b683b7110b853a32496a619410364d70b (diff)
downloadglibc-035591551400cfc810b07244a015c9411e8bff7c.zip
glibc-035591551400cfc810b07244a015c9411e8bff7c.tar.gz
glibc-035591551400cfc810b07244a015c9411e8bff7c.tar.bz2
x86: Fix misordered logic for setting `rep_movsb_stop_threshold`
Move the setting of `rep_movsb_stop_threshold` to after the tunables have been collected so that the `rep_movsb_stop_threshold` (which is used to redirect control flow to the non_temporal case) will use any user value for `non_temporal_threshold` (set using glibc.cpu.x86_non_temporal_threshold)
-rw-r--r--sysdeps/x86/dl-cacheinfo.h24
1 files changed, 12 insertions, 12 deletions
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index f64a2fb..cc3b840 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -898,18 +898,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
rep_movsb_threshold = 2112;
- unsigned long int rep_movsb_stop_threshold;
- /* ERMS feature is implemented from AMD Zen3 architecture and it is
- performing poorly for data above L2 cache size. Henceforth, adding
- an upper bound threshold parameter to limit the usage of Enhanced
- REP MOVSB operations and setting its value to L2 cache size. */
- if (cpu_features->basic.kind == arch_kind_amd)
- rep_movsb_stop_threshold = core;
- /* Setting the upper bound of ERMS to the computed value of
- non-temporal threshold for architectures other than AMD. */
- else
- rep_movsb_stop_threshold = non_temporal_threshold;
-
/* The default threshold to use Enhanced REP STOSB. */
unsigned long int rep_stosb_threshold = 2048;
@@ -951,6 +939,18 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
SIZE_MAX);
#endif
+ unsigned long int rep_movsb_stop_threshold;
+ /* ERMS feature is implemented from AMD Zen3 architecture and it is
+ performing poorly for data above L2 cache size. Henceforth, adding
+ an upper bound threshold parameter to limit the usage of Enhanced
+ REP MOVSB operations and setting its value to L2 cache size. */
+ if (cpu_features->basic.kind == arch_kind_amd)
+ rep_movsb_stop_threshold = core;
+ /* Setting the upper bound of ERMS to the computed value of
+ non-temporal threshold for architectures other than AMD. */
+ else
+ rep_movsb_stop_threshold = non_temporal_threshold;
+
cpu_features->data_cache_size = data;
cpu_features->shared_cache_size = shared;
cpu_features->non_temporal_threshold = non_temporal_threshold;