diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2015-04-21 12:56:39 +0000 |
---|---|---|
committer | Kyrylo Tkachov <ktkachov@gcc.gnu.org> | 2015-04-21 12:56:39 +0000 |
commit | 35430ca0c6c34943932b111cb55475f292a0a208 (patch) | |
tree | 65294ecdcc9cb98ab5d1d0b01ea428adf669115c /gcc/expmed.c | |
parent | 0ff093d85530159d74f77edc0da5d8bd176fe953 (diff) | |
download | gcc-35430ca0c6c34943932b111cb55475f292a0a208.zip gcc-35430ca0c6c34943932b111cb55475f292a0a208.tar.gz gcc-35430ca0c6c34943932b111cb55475f292a0a208.tar.bz2 |
[expmed] Properly account for the cost and latency of shift+add ops when synthesizing mults
* expmed.c: (synth_mult): Only assume overlapping
shift with previous steps in alg_sub_t_m2 case.
* gcc.target/aarch64/mult-synth_1.c: New test.
* gcc.target/aarch64/mult-synth_2.c: Likewise.
* gcc.target/aarch64/mult-synth_3.c: Likewise.
* gcc.target/aarch64/mult-synth_4.c: Likewise.
* gcc.target/aarch64/mult-synth_5.c: Likewise.
* gcc.target/aarch64/mult-synth_6.c: Likewise.
From-SVN: r222268
Diffstat (limited to 'gcc/expmed.c')
-rw-r--r-- | gcc/expmed.c | 55 |
1 files changed, 26 insertions, 29 deletions
diff --git a/gcc/expmed.c b/gcc/expmed.c index 6327629..6679f50 100644 --- a/gcc/expmed.c +++ b/gcc/expmed.c @@ -2664,14 +2664,28 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, m = exact_log2 (-orig_t + 1); if (m >= 0 && m < maxm) { - op_cost = shiftsub1_cost (speed, mode, m); + op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); + /* If the target has a cheap shift-and-subtract insn use + that in preference to a shift insn followed by a sub insn. + Assume that the shift-and-sub is "atomic" with a latency + equal to it's cost, otherwise assume that on superscalar + hardware the shift may be executed concurrently with the + earlier steps in the algorithm. */ + if (shiftsub1_cost (speed, mode, m) <= op_cost) + { + op_cost = shiftsub1_cost (speed, mode, m); + op_latency = op_cost; + } + else + op_latency = add_cost (speed, mode); + new_limit.cost = best_cost.cost - op_cost; - new_limit.latency = best_cost.latency - op_cost; + new_limit.latency = best_cost.latency - op_latency; synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode); alg_in->cost.cost += op_cost; - alg_in->cost.latency += op_cost; + alg_in->cost.latency += op_latency; if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) { best_cost = alg_in->cost; @@ -2704,20 +2718,12 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, if (t % d == 0 && t > d && m < maxm && (!cache_hit || cache_alg == alg_add_factor)) { - /* If the target has a cheap shift-and-add instruction use - that in preference to a shift insn followed by an add insn. - Assume that the shift-and-add is "atomic" with a latency - equal to its cost, otherwise assume that on superscalar - hardware the shift may be executed concurrently with the - earlier steps in the algorithm. */ op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); - if (shiftadd_cost (speed, mode, m) < op_cost) - { - op_cost = shiftadd_cost (speed, mode, m); - op_latency = op_cost; - } - else - op_latency = add_cost (speed, mode); + if (shiftadd_cost (speed, mode, m) <= op_cost) + op_cost = shiftadd_cost (speed, mode, m); + + op_latency = op_cost; + new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; @@ -2742,20 +2748,11 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, if (t % d == 0 && t > d && m < maxm && (!cache_hit || cache_alg == alg_sub_factor)) { - /* If the target has a cheap shift-and-subtract insn use - that in preference to a shift insn followed by a sub insn. - Assume that the shift-and-sub is "atomic" with a latency - equal to it's cost, otherwise assume that on superscalar - hardware the shift may be executed concurrently with the - earlier steps in the algorithm. */ op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); - if (shiftsub0_cost (speed, mode, m) < op_cost) - { - op_cost = shiftsub0_cost (speed, mode, m); - op_latency = op_cost; - } - else - op_latency = add_cost (speed, mode); + if (shiftsub0_cost (speed, mode, m) <= op_cost) + op_cost = shiftsub0_cost (speed, mode, m); + + op_latency = op_cost; new_limit.cost = best_cost.cost - op_cost; new_limit.latency = best_cost.latency - op_latency; |