aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorMarkus Trippelsdorf <markus@trippelsdorf.de>2017-12-17 12:01:25 +0000
committerMarkus Trippelsdorf <trippels@gcc.gnu.org>2017-12-17 12:01:25 +0000
commita2ef9558d17aeb038cbc8a66a203f7a8e6c6e81e (patch)
treeb25c6818a61fd8d5752c2aa8a73c912df16c2234 /gcc/config
parentd7f06bc3f7e1e1da11c065cc96a81f15bd0ca68f (diff)
downloadgcc-a2ef9558d17aeb038cbc8a66a203f7a8e6c6e81e.zip
gcc-a2ef9558d17aeb038cbc8a66a203f7a8e6c6e81e.tar.gz
gcc-a2ef9558d17aeb038cbc8a66a203f7a8e6c6e81e.tar.bz2
Correct imul (r64) latency for modern Intel CPUs
Since Sandybridge the 64bit multiplication latency is three cycles, not four. So update the costs to reflect reality. * x86-tune-costs.h (skylake_cost, core_cost): Decrease r64 multiply latencies. * gcc.target/i386/wmul-3.c: New test. From-SVN: r255760
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/x86-tune-costs.h9
1 files changed, 5 insertions, 4 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 64821933..477e478 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1538,8 +1538,8 @@ struct processor_costs skylake_cost = {
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
COSTS_N_INSNS (4), /* HI */
COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (4)}, /* other */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
0, /* cost of multiply per each bit set */
/* Expanding div/mod currently doesn't consider parallelism. So the cost
model is not realistic. We compensate by increasing the latencies a bit. */
@@ -2341,8 +2341,9 @@ struct processor_costs core_cost = {
{COSTS_N_INSNS (3), /* cost of starting multiply for QI */
COSTS_N_INSNS (4), /* HI */
COSTS_N_INSNS (3), /* SI */
- COSTS_N_INSNS (4), /* DI */
- COSTS_N_INSNS (4)}, /* other */
+ /* Here we tune for Sandybridge or newer. */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
0, /* cost of multiply per each bit set */
/* Expanding div/mod currently doesn't consider parallelism. So the cost
model is not realistic. We compensate by increasing the latencies a bit. */