diff options
author | Markus Trippelsdorf <markus@trippelsdorf.de> | 2017-12-17 12:01:25 +0000 |
---|---|---|
committer | Markus Trippelsdorf <trippels@gcc.gnu.org> | 2017-12-17 12:01:25 +0000 |
commit | a2ef9558d17aeb038cbc8a66a203f7a8e6c6e81e (patch) | |
tree | b25c6818a61fd8d5752c2aa8a73c912df16c2234 /gcc/config | |
parent | d7f06bc3f7e1e1da11c065cc96a81f15bd0ca68f (diff) | |
download | gcc-a2ef9558d17aeb038cbc8a66a203f7a8e6c6e81e.zip gcc-a2ef9558d17aeb038cbc8a66a203f7a8e6c6e81e.tar.gz gcc-a2ef9558d17aeb038cbc8a66a203f7a8e6c6e81e.tar.bz2 |
Correct imul (r64) latency for modern Intel CPUs
Since Sandybridge the 64bit multiplication latency is three cycles, not
four. So update the costs to reflect reality.
* x86-tune-costs.h (skylake_cost, core_cost): Decrease r64 multiply
latencies.
* gcc.target/i386/wmul-3.c: New test.
From-SVN: r255760
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/x86-tune-costs.h | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 64821933..477e478 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1538,8 +1538,8 @@ struct processor_costs skylake_cost = { {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ COSTS_N_INSNS (4), /* HI */ COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (4)}, /* other */ + COSTS_N_INSNS (3), /* DI */ + COSTS_N_INSNS (3)}, /* other */ 0, /* cost of multiply per each bit set */ /* Expanding div/mod currently doesn't consider parallelism. So the cost model is not realistic. We compensate by increasing the latencies a bit. */ @@ -2341,8 +2341,9 @@ struct processor_costs core_cost = { {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ COSTS_N_INSNS (4), /* HI */ COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (4)}, /* other */ + /* Here we tune for Sandybridge or newer. */ + COSTS_N_INSNS (3), /* DI */ + COSTS_N_INSNS (3)}, /* other */ 0, /* cost of multiply per each bit set */ /* Expanding div/mod currently doesn't consider parallelism. So the cost model is not realistic. We compensate by increasing the latencies a bit. */ |