aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <hubicka@ucw.cz>2025-04-15 19:04:15 +0200
committerJan Hubicka <hubicka@ucw.cz>2025-04-15 19:04:15 +0200
commite2011ab13de3e70774f869b356f5f9c750780b34 (patch)
tree7a7bfdbccdc149cfa40fda6c5aa5e0428d8e4dda /gcc
parent25775e73ea4d40a55a26b71c42cc6509caf4845f (diff)
downloadgcc-e2011ab13de3e70774f869b356f5f9c750780b34.zip
gcc-e2011ab13de3e70774f869b356f5f9c750780b34.tar.gz
gcc-e2011ab13de3e70774f869b356f5f9c750780b34.tar.bz2
Set ADDSS cost to 3 for znver5
Znver5 has latency of addss 2 in typical case while all earlier versions has latency 3. Unforunately addss cost is used to cost many other SSE instructions than just addss and setting the cost to 2 makes us to vectorize 4 64bit stores into one 256bit store which in turn regesses imagemagick. This patch sets the cost back to 3. Next stage1 we can untie addss from the other operatoins and set it correctly. bootstrapped/regtested x86_64-linux and also benchmarked on SPEC2k17 gcc/ChangeLog: PR target/119298 * config/i386/x86-tune-costs.h (znver5_cost): Set ADDSS cost to 3.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/x86-tune-costs.h2
1 files changed, 1 insertions, 1 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 7c8cb73..9477345 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2120,7 +2120,7 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
/* ADDSS has throughput 2 and latency 2
(in some cases when source is another addition). */
- COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */
+ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
/* MULSS has throughput 2 and latency 3. */
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
COSTS_N_INSNS (3), /* cost of MULSD instruction. */