aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2024-02-08 10:08:39 -0300
committerArjun Shankar <arjun@redhat.com>2024-04-04 12:12:11 +0200
commit6484a92698039c4a7a510f0214e22d067b0d78b3 (patch)
tree98ec54b00f89ecb05fabafba078d296f238c3894 /sysdeps
parentaa4249266e9906c4bc833e4847f4d8feef59504f (diff)
downloadglibc-6484a92698039c4a7a510f0214e22d067b0d78b3.zip
glibc-6484a92698039c4a7a510f0214e22d067b0d78b3.tar.gz
glibc-6484a92698039c4a7a510f0214e22d067b0d78b3.tar.bz2
x86: Do not prefer ERMS for memset on Zen3+
For AMD Zen3+ architecture, the performance of the vectorized loop is slightly better than ERMS. Checked on x86_64-linux-gnu on Zen3. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> (cherry picked from commit 272708884cb750f12f5c74a00e6620c19dc6d567)
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86/dl-cacheinfo.h5
1 files changed, 5 insertions, 0 deletions
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index f34d128..5a98f70 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -1021,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
minimum value is fixed. */
rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
long int, NULL);
+ if (cpu_features->basic.kind == arch_kind_amd
+ && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold))
+ /* For AMD Zen3+ architecture, the performance of the vectorized loop is
+ slightly better than ERMS. */
+ rep_stosb_threshold = SIZE_MAX;
TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);