diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2013-10-03 13:58:42 +0000 |
---|---|---|
committer | Kyrylo Tkachov <ktkachov@gcc.gnu.org> | 2013-10-03 13:58:42 +0000 |
commit | 5bea0c6c8c3deb0b52aec49434484b35e75293f2 (patch) | |
tree | d5f7a0a26c792204a9e44ba13b0f5b1f3f440052 | |
parent | 24c569251d2a19a0a75af13d7fc85802ad574801 (diff) | |
download | gcc-5bea0c6c8c3deb0b52aec49434484b35e75293f2.zip gcc-5bea0c6c8c3deb0b52aec49434484b35e75293f2.tar.gz gcc-5bea0c6c8c3deb0b52aec49434484b35e75293f2.tar.bz2 |
aarch-common-protos.h (struct alu_cost_table): New.
[gcc/]
2013-10-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Richard Earnshaw <richard.earnshaw@arm.com>
* config/arm/aarch-common-protos.h (struct alu_cost_table): New.
(struct mult_cost_table): Likewise.
(struct mem_cost_table): Likewise.
(struct fp_cost_table): Likewise.
(struct vector_cost_table): Likewise.
(cpu_cost_table): Likewise.
* config/arm/arm.opt (mold-rts-costs): New option.
(mnew-generic-costs): Likewise.
* config/arm/arm.c (generic_extra_costs): New table.
(cortexa15_extra_costs): Likewise.
(arm_slowmul_tune): Use NULL as new costs.
(arm_fastmul_tune): Likewise.
(arm_strongarm_tune): Likewise.
(arm_xscale_tune): Likewise.
(arm_9e_tune): Likewise.
(arm_v6t2_tune): Likewise.
(arm_cortex_a5_tune): Likewise.
(arm_cortex_a9_tune): Likewise.
(arm_v6m_tune): Likewise.
(arm_fa726te_tune): Likewise.
(arm_cortex_a15_tune): Use cortex15_extra_costs.
(arm_cortex_tune): Use generict_extra_costs.
(shifter_op_p): New function.
(arm_unspec_cost): Likewise.
(LIBCALL_COST): Define.
(arm_new_rtx_costs): New function.
(arm_rtx_costs): Use arm_new_rtx_costs when core-specific
table is available. Use old costs otherwise unless mnew-generic-costs
is specified.
* config/arm/arm-protos.h (tune_params): Add insn_extra_cost field.
(cpu_cost_table): Declare.
Co-Authored-By: Richard Earnshaw <rearnsha@arm.com>
From-SVN: r203160
-rw-r--r-- | gcc/ChangeLog | 35 | ||||
-rw-r--r-- | gcc/config/arm/aarch-common-protos.h | 101 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 1912 | ||||
-rw-r--r-- | gcc/config/arm/arm.opt | 8 |
5 files changed, 2052 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fd3903b..915d769 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2013-10-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com> + Richard Earnshaw <richard.earnshaw@arm.com> + + * config/arm/aarch-common-protos.h (struct alu_cost_table): New. + (struct mult_cost_table): Likewise. + (struct mem_cost_table): Likewise. + (struct fp_cost_table): Likewise. + (struct vector_cost_table): Likewise. + (cpu_cost_table): Likewise. + * config/arm/arm.opt (mold-rts-costs): New option. + (mnew-generic-costs): Likewise. + * config/arm/arm.c (generic_extra_costs): New table. + (cortexa15_extra_costs): Likewise. + (arm_slowmul_tune): Use NULL as new costs. + (arm_fastmul_tune): Likewise. + (arm_strongarm_tune): Likewise. + (arm_xscale_tune): Likewise. + (arm_9e_tune): Likewise. + (arm_v6t2_tune): Likewise. + (arm_cortex_a5_tune): Likewise. + (arm_cortex_a9_tune): Likewise. + (arm_v6m_tune): Likewise. + (arm_fa726te_tune): Likewise. + (arm_cortex_a15_tune): Use cortex15_extra_costs. + (arm_cortex_tune): Use generict_extra_costs. + (shifter_op_p): New function. + (arm_unspec_cost): Likewise. + (LIBCALL_COST): Define. + (arm_new_rtx_costs): New function. + (arm_rtx_costs): Use arm_new_rtx_costs when core-specific + table is available. Use old costs otherwise unless mnew-generic-costs + is specified. + * config/arm/arm-protos.h (tune_params): Add insn_extra_cost field. + (cpu_cost_table): Declare. + 2013-10-03 Marcus Shawcroft <marcus.shawcroft@arm.com> PR target/58460 diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 97768fc..841f544 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -1,5 +1,4 @@ -/* Function prototypes for instruction scheduling dependeoncy routines, - defined in aarch-common.c +/* Functions and structures shared between arm and aarch64. Copyright (C) 1991-2013 Free Software Foundation, Inc. Contributed by ARM Ltd. @@ -33,4 +32,102 @@ extern int arm_no_early_alu_shift_value_dep (rtx, rtx); extern int arm_no_early_mul_dep (rtx, rtx); extern int arm_no_early_store_addr_dep (rtx, rtx); +/* RTX cost table definitions. These are used when tuning for speed rather + than for size and should reflect the _additional_ cost over the cost + of the fastest instruction in the machine, which is COSTS_N_INSNS (1). + Therefore it's okay for some costs to be 0. + Costs may not have a negative value. */ +struct alu_cost_table +{ + const int arith; /* ADD/SUB. */ + const int logical; /* AND/ORR/EOR/BIC, etc. */ + const int shift; /* Simple shift. */ + const int shift_reg; /* Simple shift by reg. */ + const int arith_shift; /* Additional when arith also shifts... */ + const int arith_shift_reg; /* ... and when the shift is by a reg. */ + const int log_shift; /* Additional when logic also shifts... */ + const int log_shift_reg; /* ... and when the shift is by a reg. */ + const int extnd; /* Zero/sign extension. */ + const int extnd_arith; /* Extend and arith. */ + const int bfi; /* Bit-field insert. */ + const int bfx; /* Bit-field extraction. */ + const int clz; /* Count Leading Zeros. */ + const int non_exec; /* Extra cost when not executing insn. */ + const bool non_exec_costs_exec; /* True if non-execution must add the exec + cost. */ +}; + +struct mult_cost_table +{ + const int simple; + const int flag_setting; /* Additional cost if multiply sets flags. */ + const int extend; + const int add; + const int extend_add; + const int idiv; +}; + +/* Calculations of LDM costs are complex. We assume an initial cost + (ldm_1st) which will load the number of registers mentioned in + ldm_regs_per_insn_1st registers; then each additional + ldm_regs_per_insn_subsequent registers cost one more insn. + Similarly for STM operations. + Therefore the ldm_regs_per_insn_1st/stm_regs_per_insn_1st and + ldm_regs_per_insn_subsequent/stm_regs_per_insn_subsequent fields indicate + the number of registers loaded/stored and are expressed by a simple integer + and not by a COSTS_N_INSNS (N) expression. + */ +struct mem_cost_table +{ + const int load; + const int load_sign_extend; /* Additional to load cost. */ + const int ldrd; /* Cost of LDRD. */ + const int ldm_1st; + const int ldm_regs_per_insn_1st; + const int ldm_regs_per_insn_subsequent; + const int loadf; /* SFmode. */ + const int loadd; /* DFmode. */ + const int load_unaligned; /* Extra for unaligned loads. */ + const int store; + const int strd; + const int stm_1st; + const int stm_regs_per_insn_1st; + const int stm_regs_per_insn_subsequent; + const int storef; /* SFmode. */ + const int stored; /* DFmode. */ + const int store_unaligned; /* Extra for unaligned stores. */ +}; + +struct fp_cost_table +{ + const int div; + const int mult; + const int mult_addsub; /* Non-fused. */ + const int fma; /* Fused. */ + const int addsub; + const int fpconst; /* Immediate. */ + const int neg; /* NEG and ABS. */ + const int compare; + const int widen; /* Widen to this size. */ + const int narrow; /* Narrow from this size. */ + const int toint; + const int fromint; + const int roundint; /* V8 round to integral, remains FP format. */ +}; + +struct vector_cost_table +{ + const int alu; +}; + +struct cpu_cost_table +{ + const struct alu_cost_table alu; + const struct mult_cost_table mult[2]; /* SImode and DImode. */ + const struct mem_cost_table ldst; + const struct fp_cost_table fp[2]; /* SFmode and DFmode. */ + const struct vector_cost_table vect; +}; + + #endif /* GCC_AARCH_COMMON_PROTOS_H */ diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index f694dfd..944cf10 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -246,9 +246,12 @@ struct cpu_vec_costs { #ifdef RTX_CODE /* This needs to be here because we need RTX_CODE and similar. */ +struct cpu_cost_table; + struct tune_params { bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); + const struct cpu_cost_table *insn_extra_cost; bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); int constant_limit; /* Maximum number of instructions to conditionalise. */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index f0a7dea..42f3f47 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -945,10 +945,212 @@ struct cpu_vec_costs arm_default_vec_cost = { 1, /* cond_not_taken_branch_cost. */ }; + +const struct cpu_cost_table generic_extra_costs = +{ + /* ALU */ + { + 0, /* Arith. */ + 0, /* Logical. */ + 0, /* Shift. */ + COSTS_N_INSNS (1), /* Shift_reg. */ + 0, /* Arith_shift. */ + COSTS_N_INSNS (1), /* Arith_shift_reg. */ + 0, /* Log_shift. */ + COSTS_N_INSNS (1), /* Log_shift_reg. */ + 0, /* Extend. */ + COSTS_N_INSNS (1), /* Extend_arith. */ + 0, /* Bfi. */ + 0, /* Bfx. */ + 0, /* Clz. */ + COSTS_N_INSNS (1), /* non_exec. */ + false /* non_exec_costs_exec. */ + }, + /* MULT SImode */ + { + { + COSTS_N_INSNS (2), /* Simple. */ + COSTS_N_INSNS (1), /* Flag_setting. */ + COSTS_N_INSNS (2), /* Extend. */ + COSTS_N_INSNS (3), /* Add. */ + COSTS_N_INSNS (3), /* Extend_add. */ + COSTS_N_INSNS (8) /* Idiv. */ + }, + /* MULT DImode */ + { + 0, /* Simple (N/A). */ + 0, /* Flag_setting (N/A). */ + COSTS_N_INSNS (2), /* Extend. */ + 0, /* Add (N/A). */ + COSTS_N_INSNS (3), /* Extend_add. */ + 0 /* Idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (2), /* Load. */ + COSTS_N_INSNS (2), /* Load_sign_extend. */ + COSTS_N_INSNS (3), /* Ldrd. */ + COSTS_N_INSNS (2), /* Ldm_1st. */ + 1, /* Ldm_regs_per_insn_1st. */ + 1, /* Ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* Loadf. */ + COSTS_N_INSNS (3), /* Loadd. */ + COSTS_N_INSNS (1), /* Load_unaligned. */ + COSTS_N_INSNS (2), /* Store. */ + COSTS_N_INSNS (3), /* Strd. */ + COSTS_N_INSNS (2), /* Stm_1st. */ + 1, /* Stm_regs_per_insn_1st. */ + 1, /* Stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* Storef. */ + COSTS_N_INSNS (3), /* Stored. */ + COSTS_N_INSNS (1) /* Store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (7), /* Div. */ + COSTS_N_INSNS (2), /* Mult. */ + COSTS_N_INSNS (3), /* Mult_addsub. */ + COSTS_N_INSNS (3), /* Fma. */ + COSTS_N_INSNS (1), /* Addsub. */ + 0, /* Fpconst. */ + 0, /* Neg. */ + 0, /* Compare. */ + 0, /* Widen. */ + 0, /* Narrow. */ + 0, /* Toint. */ + 0, /* Fromint. */ + 0 /* Roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (15), /* Div. */ + COSTS_N_INSNS (5), /* Mult. */ + COSTS_N_INSNS (7), /* Mult_addsub. */ + COSTS_N_INSNS (7), /* Fma. */ + COSTS_N_INSNS (3), /* Addsub. */ + 0, /* Fpconst. */ + 0, /* Neg. */ + 0, /* Compare. */ + 0, /* Widen. */ + 0, /* Narrow. */ + 0, /* Toint. */ + 0, /* Fromint. */ + 0 /* Roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* Alu. */ + } +}; + +const struct cpu_cost_table cortexa15_extra_costs = +{ + /* ALU */ + { + COSTS_N_INSNS (1), /* Arith. */ + COSTS_N_INSNS (1), /* Logical. */ + COSTS_N_INSNS (1), /* Shift. */ + COSTS_N_INSNS (1), /* Shift_reg. */ + COSTS_N_INSNS (1), /* Arith_shift. */ + COSTS_N_INSNS (1), /* Arith_shift_reg. */ + COSTS_N_INSNS (1), /* Log_shift. */ + COSTS_N_INSNS (1), /* Log_shift_reg. */ + COSTS_N_INSNS (1), /* Extend. */ + COSTS_N_INSNS (2), /* Extend_arith. */ + COSTS_N_INSNS (2), /* Bfi. */ + COSTS_N_INSNS (1), /* Bfx. */ + COSTS_N_INSNS (1), /* Clz. */ + COSTS_N_INSNS (1), /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + /* MULT SImode */ + { + { + COSTS_N_INSNS (3), /* Simple. */ + COSTS_N_INSNS (4), /* Flag_setting. */ + COSTS_N_INSNS (3), /* Extend. */ + COSTS_N_INSNS (4), /* Add. */ + COSTS_N_INSNS (4), /* Extend_add. */ + COSTS_N_INSNS (19) /* Idiv. */ + }, + /* MULT DImode */ + { + 0, /* Simple (N/A). */ + 0, /* Flag_setting (N/A). */ + COSTS_N_INSNS (4), /* Extend. */ + 0, /* Add (N/A). */ + COSTS_N_INSNS (6), /* Extend_add. */ + 0 /* Idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (4), /* Load. */ + COSTS_N_INSNS (4), /* Load_sign_extend. */ + COSTS_N_INSNS (4), /* Ldrd. */ + COSTS_N_INSNS (5), /* Ldm_1st. */ + 1, /* Ldm_regs_per_insn_1st. */ + 2, /* Ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (5), /* Loadf. */ + COSTS_N_INSNS (5), /* Loadd. */ + COSTS_N_INSNS (1), /* Load_unaligned. */ + COSTS_N_INSNS (1), /* Store. */ + COSTS_N_INSNS (1), /* Strd. */ + COSTS_N_INSNS (2), /* Stm_1st. */ + 1, /* Stm_regs_per_insn_1st. */ + 2, /* Stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* Storef. */ + COSTS_N_INSNS (1), /* Stored. */ + COSTS_N_INSNS (1) /* Store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (18), /* Div. */ + COSTS_N_INSNS (5), /* Mult. */ + COSTS_N_INSNS (3), /* Mult_addsub. */ + COSTS_N_INSNS (13), /* Fma. */ + COSTS_N_INSNS (5), /* Addsub. */ + COSTS_N_INSNS (5), /* Fpconst. */ + COSTS_N_INSNS (3), /* Neg. */ + COSTS_N_INSNS (3), /* Compare. */ + COSTS_N_INSNS (3), /* Widen. */ + COSTS_N_INSNS (3), /* Narrow. */ + COSTS_N_INSNS (3), /* Toint. */ + COSTS_N_INSNS (3), /* Fromint. */ + COSTS_N_INSNS (3) /* Roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (32), /* Div. */ + COSTS_N_INSNS (5), /* Mult. */ + COSTS_N_INSNS (3), /* Mult_addsub. */ + COSTS_N_INSNS (13), /* Fma. */ + COSTS_N_INSNS (5), /* Addsub. */ + COSTS_N_INSNS (3), /* Fpconst. */ + COSTS_N_INSNS (3), /* Neg. */ + COSTS_N_INSNS (3), /* Compare. */ + COSTS_N_INSNS (3), /* Widen. */ + COSTS_N_INSNS (3), /* Narrow. */ + COSTS_N_INSNS (3), /* Toint. */ + COSTS_N_INSNS (3), /* Fromint. */ + COSTS_N_INSNS (3) /* Roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* Alu. */ + } +}; + const struct tune_params arm_slowmul_tune = { arm_slowmul_rtx_costs, NULL, + NULL, 3, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -964,6 +1166,7 @@ const struct tune_params arm_fastmul_tune = { arm_fastmul_rtx_costs, NULL, + NULL, 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -982,6 +1185,7 @@ const struct tune_params arm_strongarm_tune = { arm_fastmul_rtx_costs, NULL, + NULL, 1, /* Constant limit. */ 3, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -996,6 +1200,7 @@ const struct tune_params arm_strongarm_tune = const struct tune_params arm_xscale_tune = { arm_xscale_rtx_costs, + NULL, xscale_sched_adjust_cost, 2, /* Constant limit. */ 3, /* Max cond insns. */ @@ -1012,6 +1217,7 @@ const struct tune_params arm_9e_tune = { arm_9e_rtx_costs, NULL, + NULL, 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1027,6 +1233,7 @@ const struct tune_params arm_v6t2_tune = { arm_9e_rtx_costs, NULL, + NULL, 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1042,6 +1249,7 @@ const struct tune_params arm_v6t2_tune = const struct tune_params arm_cortex_tune = { arm_9e_rtx_costs, + &generic_extra_costs, NULL, 1, /* Constant limit. */ 5, /* Max cond insns. */ @@ -1057,6 +1265,7 @@ const struct tune_params arm_cortex_tune = const struct tune_params arm_cortex_a15_tune = { arm_9e_rtx_costs, + &cortexa15_extra_costs, NULL, 1, /* Constant limit. */ 2, /* Max cond insns. */ @@ -1076,6 +1285,7 @@ const struct tune_params arm_cortex_a5_tune = { arm_9e_rtx_costs, NULL, + NULL, 1, /* Constant limit. */ 1, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1090,6 +1300,7 @@ const struct tune_params arm_cortex_a5_tune = const struct tune_params arm_cortex_a9_tune = { arm_9e_rtx_costs, + NULL, cortex_a9_sched_adjust_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ @@ -1108,6 +1319,7 @@ const struct tune_params arm_v6m_tune = { arm_9e_rtx_costs, NULL, + NULL, 1, /* Constant limit. */ 5, /* Max cond insns. */ ARM_PREFETCH_NOT_BENEFICIAL, @@ -1122,6 +1334,7 @@ const struct tune_params arm_v6m_tune = const struct tune_params arm_fa726te_tune = { arm_9e_rtx_costs, + NULL, fa726te_sched_adjust_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ @@ -8291,18 +8504,1707 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, } } +/* Helper function for arm_rtx_costs. If the operand is a valid shift + operand, then return the operand that is being shifted. If the shift + is not by a constant, then set SHIFT_REG to point to the operand. + Return NULL if OP is not a shifter operand. */ +static rtx +shifter_op_p (rtx op, rtx *shift_reg) +{ + enum rtx_code code = GET_CODE (op); + + if (code == MULT && CONST_INT_P (XEXP (op, 1)) + && exact_log2 (INTVAL (XEXP (op, 1))) > 0) + return XEXP (op, 0); + else if (code == ROTATE && CONST_INT_P (XEXP (op, 1))) + return XEXP (op, 0); + else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT + || code == ASHIFTRT) + { + if (!CONST_INT_P (XEXP (op, 1))) + *shift_reg = XEXP (op, 1); + return XEXP (op, 0); + } + + return NULL; +} + +static bool +arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) +{ + const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost; + gcc_assert (GET_CODE (x) == UNSPEC); + + switch (XINT (x, 1)) + { + case UNSPEC_UNALIGNED_LOAD: + /* We can only do unaligned loads into the integer unit, and we can't + use LDM or LDRD. */ + *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x))); + if (speed_p) + *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load + + extra_cost->ldst.load_unaligned); + +#ifdef NOT_YET + *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x), + ADDR_SPACE_GENERIC, speed_p); +#endif + return true; + + case UNSPEC_UNALIGNED_STORE: + *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x))); + if (speed_p) + *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store + + extra_cost->ldst.store_unaligned); + + *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p); +#ifdef NOT_YET + *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x), + ADDR_SPACE_GENERIC, speed_p); +#endif + return true; + + case UNSPEC_VRINTZ: + case UNSPEC_VRINTP: + case UNSPEC_VRINTM: + case UNSPEC_VRINTR: + case UNSPEC_VRINTX: + case UNSPEC_VRINTA: + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint; + + return true; + default: + *cost = COSTS_N_INSNS (2); + break; + } + return false; +} + +/* Cost of a libcall. We assume one insn per argument, an amount for the + call (one insn for -Os) and then one for processing the result. */ +#define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2)) + +/* RTX costs. Make an estimate of the cost of executing the operation + X, which is contained with an operation with code OUTER_CODE. + SPEED_P indicates whether the cost desired is the performance cost, + or the size cost. The estimate is stored in COST and the return + value is TRUE if the cost calculation is final, or FALSE if the + caller should recurse through the operands of X to add additional + costs. + + We currently make no attempt to model the size savings of Thumb-2 + 16-bit instructions. At the normal points in compilation where + this code is called we have no measure of whether the condition + flags are live or not, and thus no realistic way to determine what + the size will eventually be. */ +static bool +arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + const struct cpu_cost_table *extra_cost, + int *cost, bool speed_p) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB1) + { + if (speed_p) + *cost = thumb1_rtx_costs (x, code, outer_code); + else + *cost = thumb1_size_rtx_costs (x, code, outer_code); + return true; + } + + switch (code) + { + case SET: + *cost = 0; + if (REG_P (SET_SRC (x)) + && REG_P (SET_DEST (x))) + { + /* Assume that most copies can be done with a single insn, + unless we don't have HW FP, in which case everything + larger than word mode will require two insns. */ + *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT + && GET_MODE_SIZE (mode) > 4) + || mode == DImode) + ? 2 : 1); + /* Conditional register moves can be encoded + in 16 bits in Thumb mode. */ + if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC) + *cost >>= 1; + } + + if (CONST_INT_P (SET_SRC (x))) + { + /* Handle CONST_INT here, since the value doesn't have a mode + and we would otherwise be unable to work out the true cost. */ + *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p); + mode = GET_MODE (SET_DEST (x)); + outer_code = SET; + /* Slightly lower the cost of setting a core reg to a constant. + This helps break up chains and allows for better scheduling. */ + if (REG_P (SET_DEST (x)) + && REGNO (SET_DEST (x)) <= LR_REGNUM) + *cost -= 1; + x = SET_SRC (x); + /* Immediate moves with an immediate in the range [0, 255] can be + encoded in 16 bits in Thumb mode. */ + if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode + && INTVAL (x) >= 0 && INTVAL (x) <=255) + *cost >>= 1; + goto const_int_cost; + } + + return false; + + case MEM: + /* A memory access costs 1 insn if the mode is small, or the address is + a single register, otherwise it costs one insn per word. */ + if (REG_P (XEXP (x, 0))) + *cost = COSTS_N_INSNS (1); + else if (flag_pic + && GET_CODE (XEXP (x, 0)) == PLUS + && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) + /* This will be split into two instructions. + See arm.md:calculate_pic_address. */ + *cost = COSTS_N_INSNS (2); + else + *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + + /* For speed optimizations, add the costs of the address and + accessing memory. */ + if (speed_p) +#ifdef NOT_YET + *cost += (extra_cost->ldst.load + + arm_address_cost (XEXP (x, 0), mode, + ADDR_SPACE_GENERIC, speed_p)); +#else + *cost += extra_cost->ldst.load; +#endif + return true; + + case PARALLEL: + { + /* Calculations of LDM costs are complex. We assume an initial cost + (ldm_1st) which will load the number of registers mentioned in + ldm_regs_per_insn_1st registers; then each additional + ldm_regs_per_insn_subsequent registers cost one more insn. The + formula for N regs is thus: + + ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0) + + ldm_regs_per_insn_subsequent - 1) + / ldm_regs_per_insn_subsequent). + + Additional costs may also be added for addressing. A similar + formula is used for STM. */ + + bool is_ldm = load_multiple_operation (x, SImode); + bool is_stm = store_multiple_operation (x, SImode); + + *cost = COSTS_N_INSNS (1); + + if (is_ldm || is_stm) + { + if (speed_p) + { + HOST_WIDE_INT nregs = XVECLEN (x, 0); + HOST_WIDE_INT regs_per_insn_1st = is_ldm + ? extra_cost->ldst.ldm_regs_per_insn_1st + : extra_cost->ldst.stm_regs_per_insn_1st; + HOST_WIDE_INT regs_per_insn_sub = is_ldm + ? extra_cost->ldst.ldm_regs_per_insn_subsequent + : extra_cost->ldst.stm_regs_per_insn_subsequent; + + *cost += regs_per_insn_1st + + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0)) + + regs_per_insn_sub - 1) + / regs_per_insn_sub); + return true; + } + + } + return false; + } + case DIV: + case UDIV: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + *cost = COSTS_N_INSNS (speed_p + ? extra_cost->fp[mode != SFmode].div : 1); + else if (mode == SImode && TARGET_IDIV) + *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1); + else + *cost = LIBCALL_COST (2); + return false; /* All arguments must be in registers. */ + + case MOD: + case UMOD: + *cost = LIBCALL_COST (2); + return false; /* All arguments must be in registers. */ + + case ROTATE: + if (mode == SImode && REG_P (XEXP (x, 1))) + { + *cost = (COSTS_N_INSNS (2) + + rtx_cost (XEXP (x, 0), code, 0, speed_p)); + if (speed_p) + *cost += extra_cost->alu.shift_reg; + return true; + } + /* Fall through */ + case ROTATERT: + case ASHIFT: + case LSHIFTRT: + case ASHIFTRT: + if (mode == DImode && CONST_INT_P (XEXP (x, 1))) + { + *cost = (COSTS_N_INSNS (3) + + rtx_cost (XEXP (x, 0), code, 0, speed_p)); + if (speed_p) + *cost += 2 * extra_cost->alu.shift; + return true; + } + else if (mode == SImode) + { + *cost = (COSTS_N_INSNS (1) + + rtx_cost (XEXP (x, 0), code, 0, speed_p)); + /* Slightly disparage register shifts at -Os, but not by much. */ + if (!CONST_INT_P (XEXP (x, 1))) + *cost += (speed_p ? extra_cost->alu.shift_reg : 1 + + rtx_cost (XEXP (x, 1), code, 1, speed_p)); + return true; + } + else if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + { + if (code == ASHIFT) + { + *cost = (COSTS_N_INSNS (1) + + rtx_cost (XEXP (x, 0), code, 0, speed_p)); + /* Slightly disparage register shifts at -Os, but not by + much. */ + if (!CONST_INT_P (XEXP (x, 1))) + *cost += (speed_p ? extra_cost->alu.shift_reg : 1 + + rtx_cost (XEXP (x, 1), code, 1, speed_p)); + } + else if (code == LSHIFTRT || code == ASHIFTRT) + { + if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1))) + { + /* Can use SBFX/UBFX. */ + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.bfx; + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + } + else + { + *cost = COSTS_N_INSNS (2); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + { + if (CONST_INT_P (XEXP (x, 1))) + *cost += 2 * extra_cost->alu.shift; + else + *cost += (extra_cost->alu.shift + + extra_cost->alu.shift_reg); + } + else + /* Slightly disparage register shifts. */ + *cost += !CONST_INT_P (XEXP (x, 1)); + } + } + else /* Rotates. */ + { + *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1))); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + { + if (CONST_INT_P (XEXP (x, 1))) + *cost += (2 * extra_cost->alu.shift + + extra_cost->alu.log_shift); + else + *cost += (extra_cost->alu.shift + + extra_cost->alu.shift_reg + + extra_cost->alu.log_shift_reg); + } + } + return true; + } + + *cost = LIBCALL_COST (2); + return false; + + case MINUS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == MULT + || GET_CODE (XEXP (x, 1)) == MULT) + { + rtx mul_op0, mul_op1, sub_op; + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].mult_addsub; + + if (GET_CODE (XEXP (x, 0)) == MULT) + { + mul_op0 = XEXP (XEXP (x, 0), 0); + mul_op1 = XEXP (XEXP (x, 0), 1); + sub_op = XEXP (x, 1); + } + else + { + mul_op0 = XEXP (XEXP (x, 1), 0); + mul_op1 = XEXP (XEXP (x, 1), 1); + sub_op = XEXP (x, 0); + } + + /* The first operand of the multiply may be optionally + negated. */ + if (GET_CODE (mul_op0) == NEG) + mul_op0 = XEXP (mul_op0, 0); + + *cost += (rtx_cost (mul_op0, code, 0, speed_p) + + rtx_cost (mul_op1, code, 0, speed_p) + + rtx_cost (sub_op, code, 0, speed_p)); + + return true; + } + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].addsub; + return false; + } + + if (mode == SImode) + { + rtx shift_by_reg = NULL; + rtx shift_op; + rtx non_shift_op; + + *cost = COSTS_N_INSNS (1); + + shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg); + if (shift_op == NULL) + { + shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg); + non_shift_op = XEXP (x, 0); + } + else + non_shift_op = XEXP (x, 1); + + if (shift_op != NULL) + { + if (shift_by_reg != NULL) + { + if (speed_p) + *cost += extra_cost->alu.arith_shift_reg; + *cost += rtx_cost (shift_by_reg, code, 0, speed_p); + } + else if (speed_p) + *cost += extra_cost->alu.arith_shift; + + *cost += (rtx_cost (shift_op, code, 0, speed_p) + + rtx_cost (non_shift_op, code, 0, speed_p)); + return true; + } + + if (arm_arch_thumb2 + && GET_CODE (XEXP (x, 1)) == MULT) + { + /* MLS. */ + if (speed_p) + *cost += extra_cost->mult[0].add; + *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p) + + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p) + + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p)); + return true; + } + + if (CONST_INT_P (XEXP (x, 0))) + { + int insns = arm_gen_constant (MINUS, SImode, NULL_RTX, + INTVAL (XEXP (x, 0)), NULL_RTX, + NULL_RTX, 1, 0); + *cost = COSTS_N_INSNS (insns); + if (speed_p) + *cost += insns * extra_cost->alu.arith; + *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p); + return true; + } + + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + { + /* Slightly disparage, as we might need to widen the result. */ + *cost = 1 + COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + + if (CONST_INT_P (XEXP (x, 0))) + { + *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p); + return true; + } + + return false; + } + + if (mode == DImode) + { + *cost = COSTS_N_INSNS (2); + + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) + { + rtx op1 = XEXP (x, 1); + + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + + if (GET_CODE (op1) == ZERO_EXTEND) + *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p); + else + *cost += rtx_cost (op1, MINUS, 1, speed_p); + *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, + 0, speed_p); + return true; + } + else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) + { + if (speed_p) + *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift; + *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND, + 0, speed_p) + + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p)); + return true; + } + else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND) + { + if (speed_p) + *cost += (extra_cost->alu.arith + + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND + ? extra_cost->alu.arith + : extra_cost->alu.arith_shift)); + *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p) + + rtx_cost (XEXP (XEXP (x, 1), 0), + GET_CODE (XEXP (x, 1)), 0, speed_p)); + return true; + } + + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + return false; + } + + /* Vector mode? */ + + *cost = LIBCALL_COST (2); + return false; + + case PLUS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == MULT) + { + rtx mul_op0, mul_op1, add_op; + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].mult_addsub; + + mul_op0 = XEXP (XEXP (x, 0), 0); + mul_op1 = XEXP (XEXP (x, 0), 1); + add_op = XEXP (x, 1); + + *cost += (rtx_cost (mul_op0, code, 0, speed_p) + + rtx_cost (mul_op1, code, 0, speed_p) + + rtx_cost (add_op, code, 0, speed_p)); + + return true; + } + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].addsub; + return false; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (2); + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + { + /* Narrow modes can be synthesized in SImode, but the range + of useful sub-operations is limited. */ + if (CONST_INT_P (XEXP (x, 1))) + { + int insns = arm_gen_constant (PLUS, SImode, NULL_RTX, + INTVAL (XEXP (x, 1)), NULL_RTX, + NULL_RTX, 1, 0); + *cost = COSTS_N_INSNS (insns); + if (speed_p) + *cost += insns * extra_cost->alu.arith; + /* Slightly penalize a narrow operation as the result may + need widening. */ + *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p); + return true; + } + + /* Slightly penalize a narrow operation as the result may + need widening. */ + *cost = 1 + COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + + return false; + } + + if (mode == SImode) + { + rtx shift_op, shift_reg; + + *cost = COSTS_N_INSNS (1); + if (TARGET_INT_SIMD + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + /* UXTA[BH] or SXTA[BH]. */ + if (speed_p) + *cost += extra_cost->alu.extnd_arith; + *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0, + speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p)); + return true; + } + + shift_reg = NULL; + shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); + if (shift_op != NULL) + { + if (shift_reg) + { + if (speed_p) + *cost += extra_cost->alu.arith_shift_reg; + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); + } + else if (speed_p) + *cost += extra_cost->alu.arith_shift; + + *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + if (GET_CODE (XEXP (x, 0)) == MULT) + { + rtx mul_op = XEXP (x, 0); + + *cost = COSTS_N_INSNS (1); + + if (TARGET_DSP_MULTIPLY + && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND + && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND + || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1)) + && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16))) + || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1)) + && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16 + && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND + || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1)) + && (INTVAL (XEXP (XEXP (mul_op, 1), 1)) + == 16)))))) + { + /* SMLA[BT][BT]. */ + if (speed_p) + *cost += extra_cost->mult[0].extend_add; + *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), + SIGN_EXTEND, 0, speed_p) + + rtx_cost (XEXP (XEXP (mul_op, 1), 0), + SIGN_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + + if (speed_p) + *cost += extra_cost->mult[0].add; + *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p) + + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + if (CONST_INT_P (XEXP (x, 1))) + { + int insns = arm_gen_constant (PLUS, SImode, NULL_RTX, + INTVAL (XEXP (x, 1)), NULL_RTX, + NULL_RTX, 1, 0); + *cost = COSTS_N_INSNS (insns); + if (speed_p) + *cost += insns * extra_cost->alu.arith; + *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p); + return true; + } + return false; + } + + if (mode == DImode) + { + if (arm_arch3m + && GET_CODE (XEXP (x, 0)) == MULT + && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND) + || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND))) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->mult[1].extend_add; + *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), + ZERO_EXTEND, 0, speed_p) + + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), + ZERO_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + + *cost = COSTS_N_INSNS (2); + + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) + { + if (speed_p) + *cost += (extra_cost->alu.arith + + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + ? extra_cost->alu.arith + : extra_cost->alu.arith_shift)); + + *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0, + speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + return false; + } + + /* Vector mode? */ + *cost = LIBCALL_COST (2); + return false; + + case AND: case XOR: case IOR: + if (mode == SImode) + { + enum rtx_code subcode = GET_CODE (XEXP (x, 0)); + rtx op0 = XEXP (x, 0); + rtx shift_op, shift_reg; + + *cost = COSTS_N_INSNS (1); + + if (subcode == NOT + && (code == AND + || (code == IOR && TARGET_THUMB2))) + op0 = XEXP (op0, 0); + + shift_reg = NULL; + shift_op = shifter_op_p (op0, &shift_reg); + if (shift_op != NULL) + { + if (shift_reg) + { + if (speed_p) + *cost += extra_cost->alu.log_shift_reg; + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); + } + else if (speed_p) + *cost += extra_cost->alu.log_shift; + + *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) + + rtx_cost (XEXP (x, 1), code, 1, speed_p)); + return true; + } + + if (CONST_INT_P (XEXP (x, 1))) + { + int insns = arm_gen_constant (code, SImode, NULL_RTX, + INTVAL (XEXP (x, 1)), NULL_RTX, + NULL_RTX, 1, 0); + + *cost = COSTS_N_INSNS (insns); + if (speed_p) + *cost += insns * extra_cost->alu.logical; + *cost += rtx_cost (op0, code, 0, speed_p); + return true; + } + + if (speed_p) + *cost += extra_cost->alu.logical; + *cost += (rtx_cost (op0, code, 0, speed_p) + + rtx_cost (XEXP (x, 1), code, 1, speed_p)); + return true; + } + + if (mode == DImode) + { + rtx op0 = XEXP (x, 0); + enum rtx_code subcode = GET_CODE (op0); + + *cost = COSTS_N_INSNS (2); + + if (subcode == NOT + && (code == AND + || (code == IOR && TARGET_THUMB2))) + op0 = XEXP (op0, 0); + + if (GET_CODE (op0) == ZERO_EXTEND) + { + if (speed_p) + *cost += 2 * extra_cost->alu.logical; + + *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), code, 0, speed_p)); + return true; + } + else if (GET_CODE (op0) == SIGN_EXTEND) + { + if (speed_p) + *cost += extra_cost->alu.logical + extra_cost->alu.log_shift; + + *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), code, 0, speed_p)); + return true; + } + + if (speed_p) + *cost += 2 * extra_cost->alu.logical; + + return true; + } + /* Vector mode? */ + + *cost = LIBCALL_COST (2); + return false; + + case MULT: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + rtx op0 = XEXP (x, 0); + + *cost = COSTS_N_INSNS (1); + + if (GET_CODE (op0) == NEG) + op0 = XEXP (op0, 0); + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].mult; + + *cost += (rtx_cost (op0, MULT, 0, speed_p) + + rtx_cost (XEXP (x, 1), MULT, 1, speed_p)); + return true; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (2); + return false; + } + + if (mode == SImode) + { + *cost = COSTS_N_INSNS (1); + if (TARGET_DSP_MULTIPLY + && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND + || (GET_CODE (XEXP (x, 1)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (x, 1), 1)) + && INTVAL (XEXP (XEXP (x, 1), 1)) == 16))) + || (GET_CODE (XEXP (x, 0)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && INTVAL (XEXP (XEXP (x, 0), 1)) == 16 + && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND + || (GET_CODE (XEXP (x, 1)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (x, 1), 1)) + && (INTVAL (XEXP (XEXP (x, 1), 1)) + == 16)))))) + { + /* SMUL[TB][TB]. */ + if (speed_p) + *cost += extra_cost->mult[0].extend; + *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p)); + return true; + } + if (speed_p) + *cost += extra_cost->mult[0].simple; + return false; + } + + if (mode == DImode) + { + if (arm_arch3m + && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) + || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->mult[1].extend; + *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), + ZERO_EXTEND, 0, speed_p) + + rtx_cost (XEXP (XEXP (x, 1), 0), + ZERO_EXTEND, 0, speed_p)); + return true; + } + + *cost = LIBCALL_COST (2); + return false; + } + + /* Vector mode? */ + *cost = LIBCALL_COST (2); + return false; + + case NEG: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].neg; + + return false; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (1); + return false; + } + + if (mode == SImode) + { + if (GET_CODE (XEXP (x, 0)) == ABS) + { + *cost = COSTS_N_INSNS (2); + /* Assume the non-flag-changing variant. */ + if (speed_p) + *cost += (extra_cost->alu.log_shift + + extra_cost->alu.arith_shift); + *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p); + return true; + } + + if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) + { + *cost = COSTS_N_INSNS (2); + /* No extra cost for MOV imm and MVN imm. */ + /* If the comparison op is using the flags, there's no further + cost, otherwise we need to add the cost of the comparison. */ + if (!(REG_P (XEXP (XEXP (x, 0), 0)) + && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM + && XEXP (XEXP (x, 0), 1) == const0_rtx)) + { + *cost += (COSTS_N_INSNS (1) + + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0, + speed_p) + + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1, + speed_p)); + if (speed_p) + *cost += extra_cost->alu.arith; + } + return true; + } + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + { + /* Slightly disparage, as we might need an extend operation. */ + *cost = 1 + COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + return false; + } + + if (mode == DImode) + { + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + return false; + } + + /* Vector mode? */ + *cost = LIBCALL_COST (1); + return false; + + case NOT: + if (mode == SImode) + { + rtx shift_op; + rtx shift_reg = NULL; + + *cost = COSTS_N_INSNS (1); + shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); + + if (shift_op) + { + if (shift_reg != NULL) + { + if (speed_p) + *cost += extra_cost->alu.log_shift_reg; + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); + } + else if (speed_p) + *cost += extra_cost->alu.log_shift; + *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p); + return true; + } + + if (speed_p) + *cost += extra_cost->alu.logical; + return false; + } + if (mode == DImode) + { + *cost = COSTS_N_INSNS (2); + return false; + } + + /* Vector mode? */ + + *cost += LIBCALL_COST (1); + return false; + + case IF_THEN_ELSE: + { + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) + { + *cost = COSTS_N_INSNS (4); + return true; + } + int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p); + int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p); + + *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p); + /* Assume that if one arm of the if_then_else is a register, + that it will be tied with the result and eliminate the + conditional insn. */ + if (REG_P (XEXP (x, 1))) + *cost += op2cost; + else if (REG_P (XEXP (x, 2))) + *cost += op1cost; + else + { + if (speed_p) + { + if (extra_cost->alu.non_exec_costs_exec) + *cost += op1cost + op2cost + extra_cost->alu.non_exec; + else + *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec; + } + else + *cost += op1cost + op2cost; + } + } + return true; + + case COMPARE: + if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx) + *cost = 0; + else + { + enum machine_mode op0mode; + /* We'll mostly assume that the cost of a compare is the cost of the + LHS. However, there are some notable exceptions. */ + + /* Floating point compares are never done as side-effects. */ + op0mode = GET_MODE (XEXP (x, 0)); + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT + && (op0mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[op0mode != SFmode].compare; + + if (XEXP (x, 1) == CONST0_RTX (op0mode)) + { + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + + return false; + } + else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (2); + return false; + } + + /* DImode compares normally take two insns. */ + if (op0mode == DImode) + { + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + return false; + } + + if (op0mode == SImode) + { + rtx shift_op; + rtx shift_reg; + + if (XEXP (x, 1) == const0_rtx + && !(REG_P (XEXP (x, 0)) + || (GET_CODE (XEXP (x, 0)) == SUBREG + && REG_P (SUBREG_REG (XEXP (x, 0)))))) + { + *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p); + + /* Multiply operations that set the flags are often + significantly more expensive. */ + if (speed_p + && GET_CODE (XEXP (x, 0)) == MULT + && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode)) + *cost += extra_cost->mult[0].flag_setting; + + if (speed_p + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0), + 0), 1), mode)) + *cost += extra_cost->mult[0].flag_setting; + return true; + } + + shift_reg = NULL; + shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); + if (shift_op != NULL) + { + *cost = COSTS_N_INSNS (1); + if (shift_reg != NULL) + { + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); + if (speed_p) + *cost += extra_cost->alu.arith_shift_reg; + } + else if (speed_p) + *cost += extra_cost->alu.arith_shift; + *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) + + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p)); + return true; + } + + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + if (CONST_INT_P (XEXP (x, 1)) + && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE)) + { + *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p); + return true; + } + return false; + } + + /* Vector mode? */ + + *cost = LIBCALL_COST (2); + return false; + } + return true; + + case EQ: + case NE: + case LT: + case LE: + case GT: + case GE: + case LTU: + case LEU: + case GEU: + case GTU: + case ORDERED: + case UNORDERED: + case UNEQ: + case UNLE: + case UNLT: + case UNGE: + case UNGT: + case LTGT: + if (outer_code == SET) + { + /* Is it a store-flag operation? */ + if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM + && XEXP (x, 1) == const0_rtx) + { + /* Thumb also needs an IT insn. */ + *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2); + return true; + } + if (XEXP (x, 1) == const0_rtx) + { + switch (code) + { + case LT: + /* LSR Rd, Rn, #31. */ + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.shift; + break; + + case EQ: + /* RSBS T1, Rn, #0 + ADC Rd, Rn, T1. */ + + case NE: + /* SUBS T1, Rn, #1 + SBC Rd, Rn, T1. */ + *cost = COSTS_N_INSNS (2); + break; + + case LE: + /* RSBS T1, Rn, Rn, LSR #31 + ADC Rd, Rn, T1. */ + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += extra_cost->alu.arith_shift; + break; + + case GT: + /* RSB Rd, Rn, Rn, ASR #1 + LSR Rd, Rd, #31. */ + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += (extra_cost->alu.arith_shift + + extra_cost->alu.shift); + break; + + case GE: + /* ASR Rd, Rn, #31 + ADD Rd, Rn, #1. */ + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += extra_cost->alu.shift; + break; + + default: + /* Remaining cases are either meaningless or would take + three insns anyway. */ + *cost = COSTS_N_INSNS (3); + break; + } + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + else + { + *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3); + if (CONST_INT_P (XEXP (x, 1)) + && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE)) + { + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + + return false; + } + } + /* Not directly inside a set. If it involves the condition code + register it must be the condition for a branch, cond_exec or + I_T_E operation. Since the comparison is performed elsewhere + this is just the control part which has no additional + cost. */ + else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM + && XEXP (x, 1) == const0_rtx) + { + *cost = 0; + return true; + } + + case ABS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].neg; + + return false; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (1); + return false; + } + + if (mode == SImode) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift; + return false; + } + /* Vector mode? */ + *cost = LIBCALL_COST (1); + return false; + + case SIGN_EXTEND: + if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode) + && MEM_P (XEXP (x, 0))) + { + *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p); + + if (mode == DImode) + *cost += COSTS_N_INSNS (1); + + if (!speed_p) + return true; + + if (GET_MODE (XEXP (x, 0)) == SImode) + *cost += extra_cost->ldst.load; + else + *cost += extra_cost->ldst.load_sign_extend; + + if (mode == DImode) + *cost += extra_cost->alu.shift; + + return true; + } + + /* Widening from less than 32-bits requires an extend operation. */ + if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6) + { + /* We have SXTB/SXTH. */ + *cost = COSTS_N_INSNS (1); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + *cost += extra_cost->alu.extnd; + } + else if (GET_MODE (XEXP (x, 0)) != SImode) + { + /* Needs two shifts. */ + *cost = COSTS_N_INSNS (2); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + *cost += 2 * extra_cost->alu.shift; + } + + /* Widening beyond 32-bits requires one more insn. */ + if (mode == DImode) + { + *cost += COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.shift; + } + + return true; + + case ZERO_EXTEND: + if ((arm_arch4 + || GET_MODE (XEXP (x, 0)) == SImode + || GET_MODE (XEXP (x, 0)) == QImode) + && MEM_P (XEXP (x, 0))) + { + *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p); + + if (mode == DImode) + *cost += COSTS_N_INSNS (1); /* No speed penalty. */ + + return true; + } + + /* Widening from less than 32-bits requires an extend operation. */ + if (GET_MODE (XEXP (x, 0)) == QImode) + { + /* UXTB can be a shorter instruction in Thumb2, but it might + be slower than the AND Rd, Rn, #255 alternative. When + optimizing for speed it should never be slower to use + AND, and we don't really model 16-bit vs 32-bit insns + here. */ + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.logical; + } + else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6) + { + /* We have UXTB/UXTH. */ + *cost = COSTS_N_INSNS (1); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + *cost += extra_cost->alu.extnd; + } + else if (GET_MODE (XEXP (x, 0)) != SImode) + { + /* Needs two shifts. It's marginally preferable to use + shifts rather than two BIC instructions as the second + shift may merge with a subsequent insn as a shifter + op. */ + *cost = COSTS_N_INSNS (2); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + *cost += 2 * extra_cost->alu.shift; + } + + /* Widening beyond 32-bits requires one more insn. */ + if (mode == DImode) + { + *cost += COSTS_N_INSNS (1); /* No speed penalty. */ + } + + return true; + + case CONST_INT: + *cost = 0; + /* CONST_INT has no mode, so we cannot tell for sure how many + insns are really going to be needed. The best we can do is + look at the value passed. If it fits in SImode, then assume + that's the mode it will be used for. Otherwise assume it + will be used in DImode. */ + if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode)) + mode = SImode; + else + mode = DImode; + + /* Avoid blowing up in arm_gen_constant (). */ + if (!(outer_code == PLUS + || outer_code == AND + || outer_code == IOR + || outer_code == XOR + || outer_code == MINUS)) + outer_code = SET; + + const_int_cost: + if (mode == SImode) + { + *cost += 0; + *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL, + INTVAL (x), NULL, NULL, + 0, 0)); + /* Extra costs? */ + } + else + { + *cost += COSTS_N_INSNS (arm_gen_constant + (outer_code, SImode, NULL, + trunc_int_for_mode (INTVAL (x), SImode), + NULL, NULL, 0, 0) + + arm_gen_constant (outer_code, SImode, NULL, + INTVAL (x) >> 32, NULL, + NULL, 0, 0)); + /* Extra costs? */ + } + + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + if (speed_p) + { + if (arm_arch_thumb2 && !flag_pic) + *cost = COSTS_N_INSNS (2); + else + *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load; + } + else + *cost = COSTS_N_INSNS (2); + + if (flag_pic) + { + *cost += COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + } + + return true; + + case CONST_FIXED: + *cost = COSTS_N_INSNS (4); + /* Fixme. */ + return true; + + case CONST_DOUBLE: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + if (vfp3_const_double_rtx (x)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].fpconst; + return true; + } + + if (speed_p) + { + *cost = COSTS_N_INSNS (1); + if (mode == DFmode) + *cost += extra_cost->ldst.loadd; + else + *cost += extra_cost->ldst.loadf; + } + else + *cost = COSTS_N_INSNS (2 + (mode == DFmode)); + + return true; + } + *cost = COSTS_N_INSNS (4); + return true; + + case CONST_VECTOR: + /* Fixme. */ + if (TARGET_NEON + && TARGET_HARD_FLOAT + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && neon_immediate_valid_for_move (x, mode, NULL, NULL)) + *cost = COSTS_N_INSNS (1); + else + *cost = COSTS_N_INSNS (4); + return true; + + case HIGH: + case LO_SUM: + *cost = COSTS_N_INSNS (1); + /* When optimizing for size, we prefer constant pool entries to + MOVW/MOVT pairs, so bump the cost of these slightly. */ + if (!speed_p) + *cost += 1; + return true; + + case CLZ: + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.clz; + return false; + + case SMIN: + if (XEXP (x, 1) == const0_rtx) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.log_shift; + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + /* Fall through. */ + case SMAX: + case UMIN: + case UMAX: + *cost = COSTS_N_INSNS (2); + return false; + + case TRUNCATE: + if (GET_CODE (XEXP (x, 0)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && INTVAL (XEXP (XEXP (x, 0), 1)) == 32 + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND) + || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) + == ZERO_EXTEND)))) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->mult[1].extend; + *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0, + speed_p) + + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND, + 0, speed_p)); + return true; + } + *cost = LIBCALL_COST (1); + return false; + + case UNSPEC: + return arm_unspec_cost (x, outer_code, speed_p, cost); + + case PC: + /* Reading the PC is like reading any other register. Writing it + is more expensive, but we take that into account elsewhere. */ + *cost = 0; + return true; + + case ZERO_EXTRACT: + /* TODO: Simple zero_extract of bottom bits using AND. */ + /* Fall through. */ + case SIGN_EXTRACT: + if (arm_arch6 + && mode == SImode + && CONST_INT_P (XEXP (x, 1)) + && CONST_INT_P (XEXP (x, 2))) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.bfx; + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + /* Without UBFX/SBFX, need to resort to shift operations. */ + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += 2 * extra_cost->alu.shift; + *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p); + return true; + + case FLOAT_EXTEND: + if (TARGET_HARD_FLOAT) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].widen; + if (!TARGET_FPU_ARMV8 + && GET_MODE (XEXP (x, 0)) == HFmode) + { + /* Pre v8, widening HF->DF is a two-step process, first + widening to SFmode. */ + *cost += COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[0].widen; + } + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + + *cost = LIBCALL_COST (1); + return false; + + case FLOAT_TRUNCATE: + if (TARGET_HARD_FLOAT) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].narrow; + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + /* Vector modes? */ + } + *cost = LIBCALL_COST (1); + return false; + + case FIX: + case UNSIGNED_FIX: + if (TARGET_HARD_FLOAT) + { + if (GET_MODE_CLASS (mode) == MODE_INT) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint; + /* Strip of the 'cost' of rounding towards zero. */ + if (GET_CODE (XEXP (x, 0)) == FIX) + *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p); + else + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + /* ??? Increase the cost to deal with transferring from + FP -> CORE registers? */ + return true; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT + && TARGET_FPU_ARMV8) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].roundint; + return false; + } + /* Vector costs? */ + } + *cost = LIBCALL_COST (1); + return false; + + case FLOAT: + case UNSIGNED_FLOAT: + if (TARGET_HARD_FLOAT) + { + /* ??? Increase the cost to deal with transferring from CORE + -> FP registers? */ + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].fromint; + return false; + } + *cost = LIBCALL_COST (1); + return false; + + case CALL: + *cost = COSTS_N_INSNS (1); + return true; + + case ASM_OPERANDS: + /* Just a guess. Cost one insn per input. */ + *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x)); + return true; + + default: + if (mode != VOIDmode) + *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + else + *cost = COSTS_N_INSNS (4); /* Who knows? */ + return false; + } +} + /* RTX costs when optimizing for size. */ static bool arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, int *total, bool speed) { - if (!speed) - return arm_size_rtx_costs (x, (enum rtx_code) code, - (enum rtx_code) outer_code, total); + bool result; + + if (TARGET_OLD_RTX_COSTS + || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS)) + { + /* Old way. (Deprecated.) */ + if (!speed) + result = arm_size_rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, total); + else + result = current_tune->rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, total, + speed); + } else - return current_tune->rtx_costs (x, (enum rtx_code) code, + { + /* New way. */ + if (current_tune->insn_extra_cost) + result = arm_new_rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, + current_tune->insn_extra_cost, + total, speed); + /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS + && current_tune->insn_extra_cost != NULL */ + else + result = arm_new_rtx_costs (x, (enum rtx_code) code, (enum rtx_code) outer_code, - total, speed); + &generic_extra_costs, total, speed); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + print_rtl_single (dump_file, x); + fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold", + *total, result ? "final" : "partial"); + } + return result; } /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index b9ae2b0..66e128e 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -243,6 +243,14 @@ mrestrict-it Target Report Var(arm_restrict_it) Init(2) Generate IT blocks appropriate for ARMv8. +mold-rtx-costs +Target Report Mask(OLD_RTX_COSTS) +Use the old RTX costing tables (transitional). + +mnew-generic-costs +Target Report Mask(NEW_GENERIC_COSTS) +Use the new generic RTX cost tables if new core-specific cost table not available (transitional). + mfix-cortex-m3-ldrd Target Report Var(fix_cm3_ldrd) Init(2) Avoid overlapping destination and address registers on LDRD instructions |