aboutsummaryrefslogtreecommitdiff
path: root/gcc/expmed.c
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2008-08-31 11:44:25 +0200
committerJan Hubicka <hubicka@gcc.gnu.org>2008-08-31 09:44:25 +0000
commitf40751dd3417bc2b10d85a8f6afa3771c6de7101 (patch)
tree09cc5dfcaa392f3934a1edc839c165e824b37e01 /gcc/expmed.c
parent6aebac53f319408518584e434538b66bb8f03176 (diff)
downloadgcc-f40751dd3417bc2b10d85a8f6afa3771c6de7101.zip
gcc-f40751dd3417bc2b10d85a8f6afa3771c6de7101.tar.gz
gcc-f40751dd3417bc2b10d85a8f6afa3771c6de7101.tar.bz2
fwprop.c (should_replace_address): Add speed attribute.
* fwprop.c (should_replace_address): Add speed attribute. (PR_OPTIMIZE_FOR_SPEED): New flag. (propagate_rtx_1): Use it. (propagate_rtx): Set it. (try_fwprop_subst): Update call of rtx_costs. (forward_propagate_and_simplify): LIkewise. * hooks.c (hook_int_rtx_bool_0): New (hook_bool_rtx_int_int_intp_false): Replace by ... (hook_bool_rtx_int_int_intp_bool_false): .. thisone. * hooks.h (hook_int_rtx_bool_0): New (hook_bool_rtx_int_int_intp_false): Replace by ... (hook_bool_rtx_int_int_intp_bool_false): .. thisone. * optabs.c (avoid_expensive_constant): UPdate call of rtx_cost. (prepare_cmp_insn): UPdate call of rtx_cost. * postreload.c (reload_cse_simplify_set): Update call of rtx_cost. (reload_cse_simplify_operands): Update call of rtx_cost. (reload_cse_move2add): call of rtx_cost. * target.h (struct gcc_target): Update rtx_costs and address_costs. * rtlanal.c (rtx_cost): Add speed argument. (address_cost): Add speed argument (default_address_cost): Likewise. (insn_rtx_cost): Likewise. * cfgloopanal.c (seq_cost): Add speed argument. (target_reg_cost, target_spill_cost): Turn to array. (init_set_costs): Update for speed. (estimate_reg_pressure_cost): Add speed argument. * auto-inc-dec.c (attempt_change): Update call of rtx_cost. * dojump.c (prefer_and_bit_test): UPdate call of rtx_cost. * tree-ssa-loop-ivopts.c (struct ivopts_data): New field speed. (seq_cost): Add speed argument. (computation_cost): Add speed arugment. (add_cost, multiply_by_const, get_address_cost): add speed argument. (force_expr_to_var_cost): Update for profile info. (force_var_cost): Likewise. (split_address_cost): Likewise. (ptr_difference_cost): Likewise. (difference_cost): Likewise. (get_computation_cost_at): Likewise. (determine_iv_cost): Likewise. (ivopts_global_cost_for_size): Likewise. (rewrite_use_address): Likewise. (tree_ssa_iv_optimize_loop): Initialize speed field. * cse.c (optimize_this_for_speed_p): New static var. (notreg_cost): Update call of rtx_cost. (cse_extended_basic_block): set optimize_this_for_speed_p. * ifcvt.c (cheap_bb_rtx_cost_p): Update call of rtx_cost. (noce_try_cmove_arith): Likewise. (noce_try_sign_mask): LIkewise. * expr.c (compress_float_constant): Update rtx_cost calls. * tree-ssa-address.c (most_expensive_mult_to_index): Add speed argument. (addr_to_parts): Likewise. (create_mem_ref): Likewise. * dse.c (find_shift_sequence): Add speed argument. (replace_read): Update call. * calls.c (precompute_register_parameters): Update call of rtx_cost. * expmed.c (sdiv_pow2_cheap, smod_pow2_cheap, zero_cost, add_cost, * neg_cost, shift_cost, shiftadd_cost, shiftsub_cost, mul_cost, sdiv_cost, udiv_cost ,mul_widen_cost, mul_highpart_cost): Increase dimension. (init_expmed): Initialize for both size and speed. (expand_shift): Use profile. (synth_mult): Use profile. (choose_mult_variant): Use profile. (expand_mult): Use profile. (expand_mult_highpart_optab): Use profile. (expand_mult_highpart): Use profile. (expand_smod_pow2): Use profile. (expand_divmod): Use profile. * simplify-rtx.c (simplify_binary_operation_1): Update call of rtx_cost. * loop-invariant.c (create_new_invariant): Use profile. (gain_for_invariant): Add speed parameter. (best_gain_for_invariant): Likewise. (find_invariants_to_move): Likewise. (move_single_loop_invariants): Set it. * target-def.h (TARGET_RTX_COSTS): Use hook. * rtl.h (rtx_cost, address_cost, insn_rtx_cost): Update prototpe. (optimize_insn_for_size_p, optimize_insn_for_speed_p): Declare. * output.h (default_address_cost): Update prototype. * combine.c (optimize_this_for_speed_p): New static var. (combine_validate_cost): Update call of rtx_cost. (combine_instructions): Set optimize_this_for_speed_p. (expand_compound_operation): Update call of rtx_cost. (make_extraction):Update call of rtx_cost. (force_to_mode):Update call of rtx_cost. (distribute_and_simplify_rtx):Update call of rtx_cost. * cfgloop.h (target_reg_cost, target_spill_cost): Turn to array. (estimate_reg_pressure_cost): Update prototype. * tree-flow.h (multiply_by_cost, create_mem_ref): Update prototype. * basic-block.h (optimize_insn_for_size_p, optimize_insn_for_speed_p): Remove. * config/alpha/alpha.c (alpha_rtx_costs): Update. (alpha_rtx_costs): Update. * config/frv/frv.c (frv_rtx_costs): Update. * config/s390/s390.c (s390_rtx_costs): Update. * config/m32c/m32c.c (m32c_memory_move_cost): Update. (m32c_rtx_costs): Update. * config/spu/spu.c (TARGET_ADDRESS_COST): Upate. (spu_rtx_costs): Update. * config/sparc/sparc.c (sparc_rtx_costs): Update. * config/m32r/m32r.c (m32r_rtx_costs): Update. * config/i386/i386.c (:ix86_address_cost): Update. (ix86_rtx_costs): Update. * config/sh/sh.c (sh_rtx_costs, sh_address_cost): Update. * config/pdp11/pdp11.c (pdp11_rtx_costs): Update. * config/avr/avr.c (avr_rtx_costs, avr_address_cost): Update. * config/crx/crx.c (crx_address_cost): Update. * config/xtensa/xtensa.c (xtensa_rtx_costs): Update. * config/stormy16/stormy16.c (xstormy16_address_cost, xstormy16_rtx_costs): Update. * config/m68hc11/m68hc11.c (m68hc11_address_cost, m68hc11_rtx_costs): Update. * config/cris/cris.c (cris_rtx_costs, cris_address_cost): Update. * config/iq2000/iq2000.c (iq2000_rtx_costs, iq2000_address_cost): Update. * config/mn10300/mn10300.c (mn10300_address_cost, mn10300_rtx_costs): Update * config/ia64/ia64.c (ia64_rtx_costs): Update. * config/m68k/m68k.c (m68k_rtx_costs): Update. * config/rs6000/rs6000.c (rs6000_rtx_costs): Update. * config/arc/arc.c (arc_rtx_costs, arc_address_cost): Update. * config/mcore/mcore.c (TARGET_ADDRESS_COST): Update. (mcore_rtx_costs): update. * config/score/score3.c (score3_rtx_costs): Update. * config/score/score7.c (score7_rtx_costs): Update. * config/score/score3.h (score3_rtx_costs):Update. * config/score/score7.h (score7_rtx_costs): Update. * config/score/score.c (score_rtx_costs): Update. * config/arm/arm.c (arm_address_cost): Update. (arm_rtx_costs_1): Update. (arm_rtx_costs_1): Update. (arm_size_rtx_costs): Update. (arm_size_rtx_costs): Update. (arm_size_rtx_costs): Update. (arm_xscale_rtx_costs): Update. (arm_thumb_address_cost): Update. * config/pa/pa.c (hppa_address_cost): Update. * config/mips/mips.c (mips_rtx_costs): Update. * config/vax/vax.c (vax_address_cost): Update. * config/h8300/h8300.c (h8300_shift_costs): Update. (h8300_rtx_costs): Update. * config/v850/v850.c (TARGET_ADDRESS_COST): Update. (v850_rtx_costs): Update. * config/mmix/mmix.c (mmix_rtx_costs, mmix_rtx_costs): Update. * config/bfin/bfin.c (bfin_address_cost): Update. (bfin_rtx_costs): Update. * stmt.c (lshift_cheap_p): Update. From-SVN: r139821
Diffstat (limited to 'gcc/expmed.c')
-rw-r--r--gcc/expmed.c259
1 files changed, 141 insertions, 118 deletions
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 83a8760..d4306f4 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -64,8 +64,8 @@ static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
Usually, this will mean that the MD file will emit non-branch
sequences. */
-static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
-static bool smod_pow2_cheap[NUM_MACHINE_MODES];
+static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
+static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
#ifndef SLOW_UNALIGNED_ACCESS
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
@@ -98,17 +98,17 @@ static bool smod_pow2_cheap[NUM_MACHINE_MODES];
/* Cost of various pieces of RTL. Note that some of these are indexed by
shift count and some by mode. */
-static int zero_cost;
-static int add_cost[NUM_MACHINE_MODES];
-static int neg_cost[NUM_MACHINE_MODES];
-static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
-static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
-static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
-static int mul_cost[NUM_MACHINE_MODES];
-static int sdiv_cost[NUM_MACHINE_MODES];
-static int udiv_cost[NUM_MACHINE_MODES];
-static int mul_widen_cost[NUM_MACHINE_MODES];
-static int mul_highpart_cost[NUM_MACHINE_MODES];
+static int zero_cost[2];
+static int add_cost[2][NUM_MACHINE_MODES];
+static int neg_cost[2][NUM_MACHINE_MODES];
+static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int mul_cost[2][NUM_MACHINE_MODES];
+static int sdiv_cost[2][NUM_MACHINE_MODES];
+static int udiv_cost[2][NUM_MACHINE_MODES];
+static int mul_widen_cost[2][NUM_MACHINE_MODES];
+static int mul_highpart_cost[2][NUM_MACHINE_MODES];
void
init_expmed (void)
@@ -137,15 +137,14 @@ init_expmed (void)
rtx cint[MAX_BITS_PER_WORD];
int m, n;
enum machine_mode mode, wider_mode;
+ int speed;
- zero_cost = rtx_cost (const0_rtx, 0);
for (m = 1; m < MAX_BITS_PER_WORD; m++)
{
pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
cint[m] = GEN_INT (m);
}
-
memset (&all, 0, sizeof all);
PUT_CODE (&all.reg, REG);
@@ -206,61 +205,71 @@ init_expmed (void)
XEXP (&all.shift_sub, 0) = &all.shift_mult;
XEXP (&all.shift_sub, 1) = &all.reg;
- for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
- mode != VOIDmode;
- mode = GET_MODE_WIDER_MODE (mode))
+ for (speed = 0; speed < 2; speed++)
{
- PUT_MODE (&all.reg, mode);
- PUT_MODE (&all.plus, mode);
- PUT_MODE (&all.neg, mode);
- PUT_MODE (&all.mult, mode);
- PUT_MODE (&all.sdiv, mode);
- PUT_MODE (&all.udiv, mode);
- PUT_MODE (&all.sdiv_32, mode);
- PUT_MODE (&all.smod_32, mode);
- PUT_MODE (&all.wide_trunc, mode);
- PUT_MODE (&all.shift, mode);
- PUT_MODE (&all.shift_mult, mode);
- PUT_MODE (&all.shift_add, mode);
- PUT_MODE (&all.shift_sub, mode);
-
- add_cost[mode] = rtx_cost (&all.plus, SET);
- neg_cost[mode] = rtx_cost (&all.neg, SET);
- mul_cost[mode] = rtx_cost (&all.mult, SET);
- sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
- udiv_cost[mode] = rtx_cost (&all.udiv, SET);
-
- sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
- <= 2 * add_cost[mode]);
- smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
- <= 4 * add_cost[mode]);
-
- wider_mode = GET_MODE_WIDER_MODE (mode);
- if (wider_mode != VOIDmode)
- {
- PUT_MODE (&all.zext, wider_mode);
- PUT_MODE (&all.wide_mult, wider_mode);
- PUT_MODE (&all.wide_lshr, wider_mode);
- XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
+ crtl->maybe_hot_insn_p = speed;
+ zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
- mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
- mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
- }
+ for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+ mode != VOIDmode;
+ mode = GET_MODE_WIDER_MODE (mode))
+ {
+ PUT_MODE (&all.reg, mode);
+ PUT_MODE (&all.plus, mode);
+ PUT_MODE (&all.neg, mode);
+ PUT_MODE (&all.mult, mode);
+ PUT_MODE (&all.sdiv, mode);
+ PUT_MODE (&all.udiv, mode);
+ PUT_MODE (&all.sdiv_32, mode);
+ PUT_MODE (&all.smod_32, mode);
+ PUT_MODE (&all.wide_trunc, mode);
+ PUT_MODE (&all.shift, mode);
+ PUT_MODE (&all.shift_mult, mode);
+ PUT_MODE (&all.shift_add, mode);
+ PUT_MODE (&all.shift_sub, mode);
+
+ add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
+ neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
+ mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
+ sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
+ udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
+
+ sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
+ <= 2 * add_cost[speed][mode]);
+ smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
+ <= 4 * add_cost[speed][mode]);
+
+ wider_mode = GET_MODE_WIDER_MODE (mode);
+ if (wider_mode != VOIDmode)
+ {
+ PUT_MODE (&all.zext, wider_mode);
+ PUT_MODE (&all.wide_mult, wider_mode);
+ PUT_MODE (&all.wide_lshr, wider_mode);
+ XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
+
+ mul_widen_cost[speed][wider_mode]
+ = rtx_cost (&all.wide_mult, SET, speed);
+ mul_highpart_cost[speed][mode]
+ = rtx_cost (&all.wide_trunc, SET, speed);
+ }
- shift_cost[mode][0] = 0;
- shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
+ shift_cost[speed][mode][0] = 0;
+ shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
+ = add_cost[speed][mode];
- n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
- for (m = 1; m < n; m++)
- {
- XEXP (&all.shift, 1) = cint[m];
- XEXP (&all.shift_mult, 1) = pow2[m];
+ n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
+ for (m = 1; m < n; m++)
+ {
+ XEXP (&all.shift, 1) = cint[m];
+ XEXP (&all.shift_mult, 1) = pow2[m];
- shift_cost[mode][m] = rtx_cost (&all.shift, SET);
- shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
- shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
+ shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
+ shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
+ shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
+ }
}
}
+ default_rtl_profile ();
}
/* Return an rtx representing minus the value of X.
@@ -2057,6 +2066,7 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
optab rrotate_optab = rotr_optab;
enum machine_mode op1_mode;
int attempt;
+ bool speed = optimize_insn_for_speed_p ();
op1 = expand_normal (amount);
op1_mode = GET_MODE (op1);
@@ -2098,8 +2108,8 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
&& INTVAL (op1) > 0
&& INTVAL (op1) < GET_MODE_BITSIZE (mode)
&& INTVAL (op1) < MAX_BITS_PER_WORD
- && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
- && shift_cost[mode][INTVAL (op1)] != MAX_COST)
+ && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
+ && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
{
int i;
for (i = 0; i < INTVAL (op1); i++)
@@ -2293,6 +2303,9 @@ struct alg_hash_entry {
Otherwise, the cost within which multiplication by T is
impossible. */
struct mult_cost cost;
+
+ /* OPtimized for speed? */
+ bool speed;
};
/* The number of cache/hash entries. */
@@ -2346,6 +2359,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
int hash_index;
bool cache_hit = false;
enum alg_code cache_alg = alg_zero;
+ bool speed = optimize_insn_for_speed_p ();
/* Indicate that no algorithm is yet found. If no algorithm
is found, this value will be returned and indicate failure. */
@@ -2373,13 +2387,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
fail now. */
if (t == 0)
{
- if (MULT_COST_LESS (cost_limit, zero_cost))
+ if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
return;
else
{
alg_out->ops = 1;
- alg_out->cost.cost = zero_cost;
- alg_out->cost.latency = zero_cost;
+ alg_out->cost.cost = zero_cost[speed];
+ alg_out->cost.latency = zero_cost[speed];
alg_out->op[0] = alg_zero;
return;
}
@@ -2392,11 +2406,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
best_cost = *cost_limit;
/* Compute the hash index. */
- hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
+ hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
/* See if we already know what to do for T. */
if (alg_hash[hash_index].t == t
&& alg_hash[hash_index].mode == mode
+ && alg_hash[hash_index].mode == mode
+ && alg_hash[hash_index].speed == speed
&& alg_hash[hash_index].alg != alg_unknown)
{
cache_alg = alg_hash[hash_index].alg;
@@ -2465,10 +2481,10 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
q = t >> m;
/* The function expand_shift will choose between a shift and
a sequence of additions, so the observed cost is given as
- MIN (m * add_cost[mode], shift_cost[mode][m]). */
- op_cost = m * add_cost[mode];
- if (shift_cost[mode][m] < op_cost)
- op_cost = shift_cost[mode][m];
+ MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
+ op_cost = m * add_cost[speed][mode];
+ if (shift_cost[speed][mode][m] < op_cost)
+ op_cost = shift_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, q, &new_limit, mode);
@@ -2509,7 +2525,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
/* T ends with ...111. Multiply by (T + 1) and subtract 1. */
- op_cost = add_cost[mode];
+ op_cost = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t + 1, &new_limit, mode);
@@ -2529,7 +2545,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
/* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
- op_cost = add_cost[mode];
+ op_cost = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t - 1, &new_limit, mode);
@@ -2574,14 +2590,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
equal to its cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
- op_cost = add_cost[mode] + shift_cost[mode][m];
- if (shiftadd_cost[mode][m] < op_cost)
+ op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
+ if (shiftadd_cost[speed][mode][m] < op_cost)
{
- op_cost = shiftadd_cost[mode][m];
+ op_cost = shiftadd_cost[speed][mode][m];
op_latency = op_cost;
}
else
- op_latency = add_cost[mode];
+ op_latency = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
@@ -2613,14 +2629,14 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
equal to it's cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
- op_cost = add_cost[mode] + shift_cost[mode][m];
- if (shiftsub_cost[mode][m] < op_cost)
+ op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
+ if (shiftsub_cost[speed][mode][m] < op_cost)
{
- op_cost = shiftsub_cost[mode][m];
+ op_cost = shiftsub_cost[speed][mode][m];
op_latency = op_cost;
}
else
- op_latency = add_cost[mode];
+ op_latency = add_cost[speed][mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
@@ -2654,7 +2670,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (q);
if (m >= 0 && m < maxm)
{
- op_cost = shiftadd_cost[mode][m];
+ op_cost = shiftadd_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
@@ -2679,7 +2695,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (q);
if (m >= 0 && m < maxm)
{
- op_cost = shiftsub_cost[mode][m];
+ op_cost = shiftsub_cost[speed][mode][m];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
@@ -2710,6 +2726,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
caller. */
alg_hash[hash_index].t = t;
alg_hash[hash_index].mode = mode;
+ alg_hash[hash_index].speed = speed;
alg_hash[hash_index].alg = alg_impossible;
alg_hash[hash_index].cost = *cost_limit;
return;
@@ -2720,6 +2737,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{
alg_hash[hash_index].t = t;
alg_hash[hash_index].mode = mode;
+ alg_hash[hash_index].speed = speed;
alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
alg_hash[hash_index].cost.cost = best_cost.cost;
alg_hash[hash_index].cost.latency = best_cost.latency;
@@ -2759,6 +2777,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
struct algorithm alg2;
struct mult_cost limit;
int op_cost;
+ bool speed = optimize_insn_for_speed_p ();
/* Fail quickly for impossible bounds. */
if (mult_cost < 0)
@@ -2767,7 +2786,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
/* Ensure that mult_cost provides a reasonable upper bound.
Any constant multiplication can be performed with less
than 2 * bits additions. */
- op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
+ op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
if (mult_cost > op_cost)
mult_cost = op_cost;
@@ -2780,7 +2799,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
`unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
{
- op_cost = neg_cost[mode];
+ op_cost = neg_cost[speed][mode];
if (MULT_COST_LESS (&alg->cost, mult_cost))
{
limit.cost = alg->cost.cost - op_cost;
@@ -2800,7 +2819,7 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
}
/* This proves very useful for division-by-constant. */
- op_cost = add_cost[mode];
+ op_cost = add_cost[speed][mode];
if (MULT_COST_LESS (&alg->cost, mult_cost))
{
limit.cost = alg->cost.cost - op_cost;
@@ -2988,6 +3007,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
enum mult_variant variant;
struct algorithm algorithm;
int max_cost;
+ bool speed = optimize_insn_for_speed_p ();
/* Handling const0_rtx here allows us to use zero as a rogue value for
coeff below. */
@@ -3029,8 +3049,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
result is interpreted as an unsigned coefficient.
Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
- max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
- - neg_cost[mode];
+ max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
+ - neg_cost[speed][mode];
if (max_cost > 0
&& choose_mult_variant (mode, -INTVAL (op1), &algorithm,
&variant, max_cost))
@@ -3074,7 +3094,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
/* Exclude cost of op0 from max_cost to match the cost
calculation of the synth_mult. */
- max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
+ max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
if (choose_mult_variant (mode, coeff, &algorithm, &variant,
max_cost))
return expand_mult_const (mode, op0, coeff, target,
@@ -3317,6 +3337,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
optab moptab;
rtx tem;
int size;
+ bool speed = optimize_insn_for_speed_p ();
gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
@@ -3325,7 +3346,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
- if (mul_highpart_cost[mode] < max_cost)
+ if (mul_highpart_cost[speed][mode] < max_cost)
{
moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@@ -3337,8 +3358,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */
if (size - 1 < BITS_PER_WORD
- && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
- + 4 * add_cost[mode] < max_cost))
+ && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
+ + 4 * add_cost[speed][mode] < max_cost))
{
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
tem = expand_binop (mode, moptab, op0, narrow_op1, target,
@@ -3352,7 +3373,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening multiplication. */
moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
- && mul_widen_cost[wider_mode] < max_cost)
+ && mul_widen_cost[speed][wider_mode] < max_cost)
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
unsignedp, OPTAB_WIDEN);
@@ -3363,7 +3384,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
/* Try widening the mode and perform a non-widening multiplication. */
if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
- && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
+ && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
{
rtx insns, wop0, wop1;
@@ -3390,8 +3411,8 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
&& size - 1 < BITS_PER_WORD
- && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
- + 4 * add_cost[mode] < max_cost))
+ && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
+ + 4 * add_cost[speed][mode] < max_cost))
{
tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
@@ -3429,6 +3450,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
enum mult_variant variant;
struct algorithm alg;
rtx tem;
+ bool speed = optimize_insn_for_speed_p ();
gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
/* We can't support modes wider than HOST_BITS_PER_INT. */
@@ -3444,13 +3466,13 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
return expand_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
- extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
+ extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
/* Check whether we try to multiply by a negative constant. */
if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
{
sign_adjust = true;
- extra_cost += add_cost[mode];
+ extra_cost += add_cost[speed][mode];
}
/* See whether shift/add multiplication is cheap enough. */
@@ -3510,7 +3532,7 @@ expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
temp = gen_rtx_LSHIFTRT (mode, result, shift);
if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
- || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
+ || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
{
temp = expand_binop (mode, xor_optab, op0, signmask,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
@@ -3641,7 +3663,7 @@ expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
temp = gen_reg_rtx (mode);
temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
- if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
+ if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
else
@@ -3714,6 +3736,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
int max_cost, extra_cost;
static HOST_WIDE_INT last_div_const = 0;
static HOST_WIDE_INT ext_op1;
+ bool speed = optimize_insn_for_speed_p ();
op1_is_constant = GET_CODE (op1) == CONST_INT;
if (op1_is_constant)
@@ -3844,10 +3867,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
/* Only deduct something for a REM if the last divide done was
for a different constant. Then set the constant of the last
divide. */
- max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
+ max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
if (rem_flag && ! (last_div_const != 0 && op1_is_constant
&& INTVAL (op1) == last_div_const))
- max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
+ max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
@@ -3966,9 +3989,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
goto fail1;
extra_cost
- = (shift_cost[compute_mode][post_shift - 1]
- + shift_cost[compute_mode][1]
- + 2 * add_cost[compute_mode]);
+ = (shift_cost[speed][compute_mode][post_shift - 1]
+ + shift_cost[speed][compute_mode][1]
+ + 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1,
max_cost - extra_cost);
@@ -4002,8 +4025,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
build_int_cst (NULL_TREE, pre_shift),
NULL_RTX, 1);
extra_cost
- = (shift_cost[compute_mode][pre_shift]
- + shift_cost[compute_mode][post_shift]);
+ = (shift_cost[speed][compute_mode][pre_shift]
+ + shift_cost[speed][compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml,
NULL_RTX, 1,
max_cost - extra_cost);
@@ -4133,9 +4156,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|| size - 1 >= BITS_PER_WORD)
goto fail1;
- extra_cost = (shift_cost[compute_mode][post_shift]
- + shift_cost[compute_mode][size - 1]
- + add_cost[compute_mode]);
+ extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ + shift_cost[speed][compute_mode][size - 1]
+ + add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
max_cost - extra_cost);
@@ -4170,9 +4193,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
mlr = gen_int_mode (ml, compute_mode);
- extra_cost = (shift_cost[compute_mode][post_shift]
- + shift_cost[compute_mode][size - 1]
- + 2 * add_cost[compute_mode]);
+ extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ + shift_cost[speed][compute_mode][size - 1]
+ + 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
max_cost - extra_cost);
@@ -4265,9 +4288,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
NULL_RTX, 0);
t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN);
- extra_cost = (shift_cost[compute_mode][post_shift]
- + shift_cost[compute_mode][size - 1]
- + 2 * add_cost[compute_mode]);
+ extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ + shift_cost[speed][compute_mode][size - 1]
+ + 2 * add_cost[speed][compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
max_cost - extra_cost);