aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2013-10-03 13:58:42 +0000
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>2013-10-03 13:58:42 +0000
commit5bea0c6c8c3deb0b52aec49434484b35e75293f2 (patch)
treed5f7a0a26c792204a9e44ba13b0f5b1f3f440052
parent24c569251d2a19a0a75af13d7fc85802ad574801 (diff)
downloadgcc-5bea0c6c8c3deb0b52aec49434484b35e75293f2.zip
gcc-5bea0c6c8c3deb0b52aec49434484b35e75293f2.tar.gz
gcc-5bea0c6c8c3deb0b52aec49434484b35e75293f2.tar.bz2
aarch-common-protos.h (struct alu_cost_table): New.
[gcc/] 2013-10-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com> Richard Earnshaw <richard.earnshaw@arm.com> * config/arm/aarch-common-protos.h (struct alu_cost_table): New. (struct mult_cost_table): Likewise. (struct mem_cost_table): Likewise. (struct fp_cost_table): Likewise. (struct vector_cost_table): Likewise. (cpu_cost_table): Likewise. * config/arm/arm.opt (mold-rts-costs): New option. (mnew-generic-costs): Likewise. * config/arm/arm.c (generic_extra_costs): New table. (cortexa15_extra_costs): Likewise. (arm_slowmul_tune): Use NULL as new costs. (arm_fastmul_tune): Likewise. (arm_strongarm_tune): Likewise. (arm_xscale_tune): Likewise. (arm_9e_tune): Likewise. (arm_v6t2_tune): Likewise. (arm_cortex_a5_tune): Likewise. (arm_cortex_a9_tune): Likewise. (arm_v6m_tune): Likewise. (arm_fa726te_tune): Likewise. (arm_cortex_a15_tune): Use cortex15_extra_costs. (arm_cortex_tune): Use generict_extra_costs. (shifter_op_p): New function. (arm_unspec_cost): Likewise. (LIBCALL_COST): Define. (arm_new_rtx_costs): New function. (arm_rtx_costs): Use arm_new_rtx_costs when core-specific table is available. Use old costs otherwise unless mnew-generic-costs is specified. * config/arm/arm-protos.h (tune_params): Add insn_extra_cost field. (cpu_cost_table): Declare. Co-Authored-By: Richard Earnshaw <rearnsha@arm.com> From-SVN: r203160
-rw-r--r--gcc/ChangeLog35
-rw-r--r--gcc/config/arm/aarch-common-protos.h101
-rw-r--r--gcc/config/arm/arm-protos.h3
-rw-r--r--gcc/config/arm/arm.c1912
-rw-r--r--gcc/config/arm/arm.opt8
5 files changed, 2052 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index fd3903b..915d769 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,38 @@
+2013-10-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+ Richard Earnshaw <richard.earnshaw@arm.com>
+
+ * config/arm/aarch-common-protos.h (struct alu_cost_table): New.
+ (struct mult_cost_table): Likewise.
+ (struct mem_cost_table): Likewise.
+ (struct fp_cost_table): Likewise.
+ (struct vector_cost_table): Likewise.
+ (cpu_cost_table): Likewise.
+ * config/arm/arm.opt (mold-rts-costs): New option.
+ (mnew-generic-costs): Likewise.
+ * config/arm/arm.c (generic_extra_costs): New table.
+ (cortexa15_extra_costs): Likewise.
+ (arm_slowmul_tune): Use NULL as new costs.
+ (arm_fastmul_tune): Likewise.
+ (arm_strongarm_tune): Likewise.
+ (arm_xscale_tune): Likewise.
+ (arm_9e_tune): Likewise.
+ (arm_v6t2_tune): Likewise.
+ (arm_cortex_a5_tune): Likewise.
+ (arm_cortex_a9_tune): Likewise.
+ (arm_v6m_tune): Likewise.
+ (arm_fa726te_tune): Likewise.
+ (arm_cortex_a15_tune): Use cortex15_extra_costs.
+ (arm_cortex_tune): Use generict_extra_costs.
+ (shifter_op_p): New function.
+ (arm_unspec_cost): Likewise.
+ (LIBCALL_COST): Define.
+ (arm_new_rtx_costs): New function.
+ (arm_rtx_costs): Use arm_new_rtx_costs when core-specific
+ table is available. Use old costs otherwise unless mnew-generic-costs
+ is specified.
+ * config/arm/arm-protos.h (tune_params): Add insn_extra_cost field.
+ (cpu_cost_table): Declare.
+
2013-10-03 Marcus Shawcroft <marcus.shawcroft@arm.com>
PR target/58460
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 97768fc..841f544 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -1,5 +1,4 @@
-/* Function prototypes for instruction scheduling dependeoncy routines,
- defined in aarch-common.c
+/* Functions and structures shared between arm and aarch64.
Copyright (C) 1991-2013 Free Software Foundation, Inc.
Contributed by ARM Ltd.
@@ -33,4 +32,102 @@ extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx);
extern int arm_no_early_store_addr_dep (rtx, rtx);
+/* RTX cost table definitions. These are used when tuning for speed rather
+ than for size and should reflect the _additional_ cost over the cost
+ of the fastest instruction in the machine, which is COSTS_N_INSNS (1).
+ Therefore it's okay for some costs to be 0.
+ Costs may not have a negative value. */
+struct alu_cost_table
+{
+ const int arith; /* ADD/SUB. */
+ const int logical; /* AND/ORR/EOR/BIC, etc. */
+ const int shift; /* Simple shift. */
+ const int shift_reg; /* Simple shift by reg. */
+ const int arith_shift; /* Additional when arith also shifts... */
+ const int arith_shift_reg; /* ... and when the shift is by a reg. */
+ const int log_shift; /* Additional when logic also shifts... */
+ const int log_shift_reg; /* ... and when the shift is by a reg. */
+ const int extnd; /* Zero/sign extension. */
+ const int extnd_arith; /* Extend and arith. */
+ const int bfi; /* Bit-field insert. */
+ const int bfx; /* Bit-field extraction. */
+ const int clz; /* Count Leading Zeros. */
+ const int non_exec; /* Extra cost when not executing insn. */
+ const bool non_exec_costs_exec; /* True if non-execution must add the exec
+ cost. */
+};
+
+struct mult_cost_table
+{
+ const int simple;
+ const int flag_setting; /* Additional cost if multiply sets flags. */
+ const int extend;
+ const int add;
+ const int extend_add;
+ const int idiv;
+};
+
+/* Calculations of LDM costs are complex. We assume an initial cost
+ (ldm_1st) which will load the number of registers mentioned in
+ ldm_regs_per_insn_1st registers; then each additional
+ ldm_regs_per_insn_subsequent registers cost one more insn.
+ Similarly for STM operations.
+ Therefore the ldm_regs_per_insn_1st/stm_regs_per_insn_1st and
+ ldm_regs_per_insn_subsequent/stm_regs_per_insn_subsequent fields indicate
+ the number of registers loaded/stored and are expressed by a simple integer
+ and not by a COSTS_N_INSNS (N) expression.
+ */
+struct mem_cost_table
+{
+ const int load;
+ const int load_sign_extend; /* Additional to load cost. */
+ const int ldrd; /* Cost of LDRD. */
+ const int ldm_1st;
+ const int ldm_regs_per_insn_1st;
+ const int ldm_regs_per_insn_subsequent;
+ const int loadf; /* SFmode. */
+ const int loadd; /* DFmode. */
+ const int load_unaligned; /* Extra for unaligned loads. */
+ const int store;
+ const int strd;
+ const int stm_1st;
+ const int stm_regs_per_insn_1st;
+ const int stm_regs_per_insn_subsequent;
+ const int storef; /* SFmode. */
+ const int stored; /* DFmode. */
+ const int store_unaligned; /* Extra for unaligned stores. */
+};
+
+struct fp_cost_table
+{
+ const int div;
+ const int mult;
+ const int mult_addsub; /* Non-fused. */
+ const int fma; /* Fused. */
+ const int addsub;
+ const int fpconst; /* Immediate. */
+ const int neg; /* NEG and ABS. */
+ const int compare;
+ const int widen; /* Widen to this size. */
+ const int narrow; /* Narrow from this size. */
+ const int toint;
+ const int fromint;
+ const int roundint; /* V8 round to integral, remains FP format. */
+};
+
+struct vector_cost_table
+{
+ const int alu;
+};
+
+struct cpu_cost_table
+{
+ const struct alu_cost_table alu;
+ const struct mult_cost_table mult[2]; /* SImode and DImode. */
+ const struct mem_cost_table ldst;
+ const struct fp_cost_table fp[2]; /* SFmode and DFmode. */
+ const struct vector_cost_table vect;
+};
+
+
#endif /* GCC_AARCH_COMMON_PROTOS_H */
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index f694dfd..944cf10 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -246,9 +246,12 @@ struct cpu_vec_costs {
#ifdef RTX_CODE
/* This needs to be here because we need RTX_CODE and similar. */
+struct cpu_cost_table;
+
struct tune_params
{
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
+ const struct cpu_cost_table *insn_extra_cost;
bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
int constant_limit;
/* Maximum number of instructions to conditionalise. */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index f0a7dea..42f3f47 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -945,10 +945,212 @@ struct cpu_vec_costs arm_default_vec_cost = {
1, /* cond_not_taken_branch_cost. */
};
+
+const struct cpu_cost_table generic_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* Arith. */
+ 0, /* Logical. */
+ 0, /* Shift. */
+ COSTS_N_INSNS (1), /* Shift_reg. */
+ 0, /* Arith_shift. */
+ COSTS_N_INSNS (1), /* Arith_shift_reg. */
+ 0, /* Log_shift. */
+ COSTS_N_INSNS (1), /* Log_shift_reg. */
+ 0, /* Extend. */
+ COSTS_N_INSNS (1), /* Extend_arith. */
+ 0, /* Bfi. */
+ 0, /* Bfx. */
+ 0, /* Clz. */
+ COSTS_N_INSNS (1), /* non_exec. */
+ false /* non_exec_costs_exec. */
+ },
+ /* MULT SImode */
+ {
+ {
+ COSTS_N_INSNS (2), /* Simple. */
+ COSTS_N_INSNS (1), /* Flag_setting. */
+ COSTS_N_INSNS (2), /* Extend. */
+ COSTS_N_INSNS (3), /* Add. */
+ COSTS_N_INSNS (3), /* Extend_add. */
+ COSTS_N_INSNS (8) /* Idiv. */
+ },
+ /* MULT DImode */
+ {
+ 0, /* Simple (N/A). */
+ 0, /* Flag_setting (N/A). */
+ COSTS_N_INSNS (2), /* Extend. */
+ 0, /* Add (N/A). */
+ COSTS_N_INSNS (3), /* Extend_add. */
+ 0 /* Idiv (N/A). */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (2), /* Load. */
+ COSTS_N_INSNS (2), /* Load_sign_extend. */
+ COSTS_N_INSNS (3), /* Ldrd. */
+ COSTS_N_INSNS (2), /* Ldm_1st. */
+ 1, /* Ldm_regs_per_insn_1st. */
+ 1, /* Ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (2), /* Loadf. */
+ COSTS_N_INSNS (3), /* Loadd. */
+ COSTS_N_INSNS (1), /* Load_unaligned. */
+ COSTS_N_INSNS (2), /* Store. */
+ COSTS_N_INSNS (3), /* Strd. */
+ COSTS_N_INSNS (2), /* Stm_1st. */
+ 1, /* Stm_regs_per_insn_1st. */
+ 1, /* Stm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (2), /* Storef. */
+ COSTS_N_INSNS (3), /* Stored. */
+ COSTS_N_INSNS (1) /* Store_unaligned. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (7), /* Div. */
+ COSTS_N_INSNS (2), /* Mult. */
+ COSTS_N_INSNS (3), /* Mult_addsub. */
+ COSTS_N_INSNS (3), /* Fma. */
+ COSTS_N_INSNS (1), /* Addsub. */
+ 0, /* Fpconst. */
+ 0, /* Neg. */
+ 0, /* Compare. */
+ 0, /* Widen. */
+ 0, /* Narrow. */
+ 0, /* Toint. */
+ 0, /* Fromint. */
+ 0 /* Roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (15), /* Div. */
+ COSTS_N_INSNS (5), /* Mult. */
+ COSTS_N_INSNS (7), /* Mult_addsub. */
+ COSTS_N_INSNS (7), /* Fma. */
+ COSTS_N_INSNS (3), /* Addsub. */
+ 0, /* Fpconst. */
+ 0, /* Neg. */
+ 0, /* Compare. */
+ 0, /* Widen. */
+ 0, /* Narrow. */
+ 0, /* Toint. */
+ 0, /* Fromint. */
+ 0 /* Roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* Alu. */
+ }
+};
+
+const struct cpu_cost_table cortexa15_extra_costs =
+{
+ /* ALU */
+ {
+ COSTS_N_INSNS (1), /* Arith. */
+ COSTS_N_INSNS (1), /* Logical. */
+ COSTS_N_INSNS (1), /* Shift. */
+ COSTS_N_INSNS (1), /* Shift_reg. */
+ COSTS_N_INSNS (1), /* Arith_shift. */
+ COSTS_N_INSNS (1), /* Arith_shift_reg. */
+ COSTS_N_INSNS (1), /* Log_shift. */
+ COSTS_N_INSNS (1), /* Log_shift_reg. */
+ COSTS_N_INSNS (1), /* Extend. */
+ COSTS_N_INSNS (2), /* Extend_arith. */
+ COSTS_N_INSNS (2), /* Bfi. */
+ COSTS_N_INSNS (1), /* Bfx. */
+ COSTS_N_INSNS (1), /* Clz. */
+ COSTS_N_INSNS (1), /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+ /* MULT SImode */
+ {
+ {
+ COSTS_N_INSNS (3), /* Simple. */
+ COSTS_N_INSNS (4), /* Flag_setting. */
+ COSTS_N_INSNS (3), /* Extend. */
+ COSTS_N_INSNS (4), /* Add. */
+ COSTS_N_INSNS (4), /* Extend_add. */
+ COSTS_N_INSNS (19) /* Idiv. */
+ },
+ /* MULT DImode */
+ {
+ 0, /* Simple (N/A). */
+ 0, /* Flag_setting (N/A). */
+ COSTS_N_INSNS (4), /* Extend. */
+ 0, /* Add (N/A). */
+ COSTS_N_INSNS (6), /* Extend_add. */
+ 0 /* Idiv (N/A). */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (4), /* Load. */
+ COSTS_N_INSNS (4), /* Load_sign_extend. */
+ COSTS_N_INSNS (4), /* Ldrd. */
+ COSTS_N_INSNS (5), /* Ldm_1st. */
+ 1, /* Ldm_regs_per_insn_1st. */
+ 2, /* Ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (5), /* Loadf. */
+ COSTS_N_INSNS (5), /* Loadd. */
+ COSTS_N_INSNS (1), /* Load_unaligned. */
+ COSTS_N_INSNS (1), /* Store. */
+ COSTS_N_INSNS (1), /* Strd. */
+ COSTS_N_INSNS (2), /* Stm_1st. */
+ 1, /* Stm_regs_per_insn_1st. */
+ 2, /* Stm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (1), /* Storef. */
+ COSTS_N_INSNS (1), /* Stored. */
+ COSTS_N_INSNS (1) /* Store_unaligned. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (18), /* Div. */
+ COSTS_N_INSNS (5), /* Mult. */
+ COSTS_N_INSNS (3), /* Mult_addsub. */
+ COSTS_N_INSNS (13), /* Fma. */
+ COSTS_N_INSNS (5), /* Addsub. */
+ COSTS_N_INSNS (5), /* Fpconst. */
+ COSTS_N_INSNS (3), /* Neg. */
+ COSTS_N_INSNS (3), /* Compare. */
+ COSTS_N_INSNS (3), /* Widen. */
+ COSTS_N_INSNS (3), /* Narrow. */
+ COSTS_N_INSNS (3), /* Toint. */
+ COSTS_N_INSNS (3), /* Fromint. */
+ COSTS_N_INSNS (3) /* Roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (32), /* Div. */
+ COSTS_N_INSNS (5), /* Mult. */
+ COSTS_N_INSNS (3), /* Mult_addsub. */
+ COSTS_N_INSNS (13), /* Fma. */
+ COSTS_N_INSNS (5), /* Addsub. */
+ COSTS_N_INSNS (3), /* Fpconst. */
+ COSTS_N_INSNS (3), /* Neg. */
+ COSTS_N_INSNS (3), /* Compare. */
+ COSTS_N_INSNS (3), /* Widen. */
+ COSTS_N_INSNS (3), /* Narrow. */
+ COSTS_N_INSNS (3), /* Toint. */
+ COSTS_N_INSNS (3), /* Fromint. */
+ COSTS_N_INSNS (3) /* Roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* Alu. */
+ }
+};
+
const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
NULL,
+ NULL,
3, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -964,6 +1166,7 @@ const struct tune_params arm_fastmul_tune =
{
arm_fastmul_rtx_costs,
NULL,
+ NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -982,6 +1185,7 @@ const struct tune_params arm_strongarm_tune =
{
arm_fastmul_rtx_costs,
NULL,
+ NULL,
1, /* Constant limit. */
3, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -996,6 +1200,7 @@ const struct tune_params arm_strongarm_tune =
const struct tune_params arm_xscale_tune =
{
arm_xscale_rtx_costs,
+ NULL,
xscale_sched_adjust_cost,
2, /* Constant limit. */
3, /* Max cond insns. */
@@ -1012,6 +1217,7 @@ const struct tune_params arm_9e_tune =
{
arm_9e_rtx_costs,
NULL,
+ NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1027,6 +1233,7 @@ const struct tune_params arm_v6t2_tune =
{
arm_9e_rtx_costs,
NULL,
+ NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1042,6 +1249,7 @@ const struct tune_params arm_v6t2_tune =
const struct tune_params arm_cortex_tune =
{
arm_9e_rtx_costs,
+ &generic_extra_costs,
NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
@@ -1057,6 +1265,7 @@ const struct tune_params arm_cortex_tune =
const struct tune_params arm_cortex_a15_tune =
{
arm_9e_rtx_costs,
+ &cortexa15_extra_costs,
NULL,
1, /* Constant limit. */
2, /* Max cond insns. */
@@ -1076,6 +1285,7 @@ const struct tune_params arm_cortex_a5_tune =
{
arm_9e_rtx_costs,
NULL,
+ NULL,
1, /* Constant limit. */
1, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1090,6 +1300,7 @@ const struct tune_params arm_cortex_a5_tune =
const struct tune_params arm_cortex_a9_tune =
{
arm_9e_rtx_costs,
+ NULL,
cortex_a9_sched_adjust_cost,
1, /* Constant limit. */
5, /* Max cond insns. */
@@ -1108,6 +1319,7 @@ const struct tune_params arm_v6m_tune =
{
arm_9e_rtx_costs,
NULL,
+ NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
@@ -1122,6 +1334,7 @@ const struct tune_params arm_v6m_tune =
const struct tune_params arm_fa726te_tune =
{
arm_9e_rtx_costs,
+ NULL,
fa726te_sched_adjust_cost,
1, /* Constant limit. */
5, /* Max cond insns. */
@@ -8291,18 +8504,1707 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
}
}
+/* Helper function for arm_rtx_costs. If the operand is a valid shift
+ operand, then return the operand that is being shifted. If the shift
+ is not by a constant, then set SHIFT_REG to point to the operand.
+ Return NULL if OP is not a shifter operand. */
+static rtx
+shifter_op_p (rtx op, rtx *shift_reg)
+{
+ enum rtx_code code = GET_CODE (op);
+
+ if (code == MULT && CONST_INT_P (XEXP (op, 1))
+ && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
+ return XEXP (op, 0);
+ else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
+ return XEXP (op, 0);
+ else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
+ || code == ASHIFTRT)
+ {
+ if (!CONST_INT_P (XEXP (op, 1)))
+ *shift_reg = XEXP (op, 1);
+ return XEXP (op, 0);
+ }
+
+ return NULL;
+}
+
+static bool
+arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
+{
+ const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
+ gcc_assert (GET_CODE (x) == UNSPEC);
+
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_UNALIGNED_LOAD:
+ /* We can only do unaligned loads into the integer unit, and we can't
+ use LDM or LDRD. */
+ *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
+ if (speed_p)
+ *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
+ + extra_cost->ldst.load_unaligned);
+
+#ifdef NOT_YET
+ *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
+ ADDR_SPACE_GENERIC, speed_p);
+#endif
+ return true;
+
+ case UNSPEC_UNALIGNED_STORE:
+ *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
+ if (speed_p)
+ *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
+ + extra_cost->ldst.store_unaligned);
+
+ *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
+#ifdef NOT_YET
+ *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
+ ADDR_SPACE_GENERIC, speed_p);
+#endif
+ return true;
+
+ case UNSPEC_VRINTZ:
+ case UNSPEC_VRINTP:
+ case UNSPEC_VRINTM:
+ case UNSPEC_VRINTR:
+ case UNSPEC_VRINTX:
+ case UNSPEC_VRINTA:
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
+
+ return true;
+ default:
+ *cost = COSTS_N_INSNS (2);
+ break;
+ }
+ return false;
+}
+
+/* Cost of a libcall. We assume one insn per argument, an amount for the
+ call (one insn for -Os) and then one for processing the result. */
+#define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
+
+/* RTX costs. Make an estimate of the cost of executing the operation
+ X, which is contained with an operation with code OUTER_CODE.
+ SPEED_P indicates whether the cost desired is the performance cost,
+ or the size cost. The estimate is stored in COST and the return
+ value is TRUE if the cost calculation is final, or FALSE if the
+ caller should recurse through the operands of X to add additional
+ costs.
+
+ We currently make no attempt to model the size savings of Thumb-2
+ 16-bit instructions. At the normal points in compilation where
+ this code is called we have no measure of whether the condition
+ flags are live or not, and thus no realistic way to determine what
+ the size will eventually be. */
+static bool
+arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ const struct cpu_cost_table *extra_cost,
+ int *cost, bool speed_p)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ if (TARGET_THUMB1)
+ {
+ if (speed_p)
+ *cost = thumb1_rtx_costs (x, code, outer_code);
+ else
+ *cost = thumb1_size_rtx_costs (x, code, outer_code);
+ return true;
+ }
+
+ switch (code)
+ {
+ case SET:
+ *cost = 0;
+ if (REG_P (SET_SRC (x))
+ && REG_P (SET_DEST (x)))
+ {
+ /* Assume that most copies can be done with a single insn,
+ unless we don't have HW FP, in which case everything
+ larger than word mode will require two insns. */
+ *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
+ && GET_MODE_SIZE (mode) > 4)
+ || mode == DImode)
+ ? 2 : 1);
+ /* Conditional register moves can be encoded
+ in 16 bits in Thumb mode. */
+ if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
+ *cost >>= 1;
+ }
+
+ if (CONST_INT_P (SET_SRC (x)))
+ {
+ /* Handle CONST_INT here, since the value doesn't have a mode
+ and we would otherwise be unable to work out the true cost. */
+ *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
+ mode = GET_MODE (SET_DEST (x));
+ outer_code = SET;
+ /* Slightly lower the cost of setting a core reg to a constant.
+ This helps break up chains and allows for better scheduling. */
+ if (REG_P (SET_DEST (x))
+ && REGNO (SET_DEST (x)) <= LR_REGNUM)
+ *cost -= 1;
+ x = SET_SRC (x);
+ /* Immediate moves with an immediate in the range [0, 255] can be
+ encoded in 16 bits in Thumb mode. */
+ if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
+ && INTVAL (x) >= 0 && INTVAL (x) <=255)
+ *cost >>= 1;
+ goto const_int_cost;
+ }
+
+ return false;
+
+ case MEM:
+ /* A memory access costs 1 insn if the mode is small, or the address is
+ a single register, otherwise it costs one insn per word. */
+ if (REG_P (XEXP (x, 0)))
+ *cost = COSTS_N_INSNS (1);
+ else if (flag_pic
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
+ /* This will be split into two instructions.
+ See arm.md:calculate_pic_address. */
+ *cost = COSTS_N_INSNS (2);
+ else
+ *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+
+ /* For speed optimizations, add the costs of the address and
+ accessing memory. */
+ if (speed_p)
+#ifdef NOT_YET
+ *cost += (extra_cost->ldst.load
+ + arm_address_cost (XEXP (x, 0), mode,
+ ADDR_SPACE_GENERIC, speed_p));
+#else
+ *cost += extra_cost->ldst.load;
+#endif
+ return true;
+
+ case PARALLEL:
+ {
+ /* Calculations of LDM costs are complex. We assume an initial cost
+ (ldm_1st) which will load the number of registers mentioned in
+ ldm_regs_per_insn_1st registers; then each additional
+ ldm_regs_per_insn_subsequent registers cost one more insn. The
+ formula for N regs is thus:
+
+ ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
+ + ldm_regs_per_insn_subsequent - 1)
+ / ldm_regs_per_insn_subsequent).
+
+ Additional costs may also be added for addressing. A similar
+ formula is used for STM. */
+
+ bool is_ldm = load_multiple_operation (x, SImode);
+ bool is_stm = store_multiple_operation (x, SImode);
+
+ *cost = COSTS_N_INSNS (1);
+
+ if (is_ldm || is_stm)
+ {
+ if (speed_p)
+ {
+ HOST_WIDE_INT nregs = XVECLEN (x, 0);
+ HOST_WIDE_INT regs_per_insn_1st = is_ldm
+ ? extra_cost->ldst.ldm_regs_per_insn_1st
+ : extra_cost->ldst.stm_regs_per_insn_1st;
+ HOST_WIDE_INT regs_per_insn_sub = is_ldm
+ ? extra_cost->ldst.ldm_regs_per_insn_subsequent
+ : extra_cost->ldst.stm_regs_per_insn_subsequent;
+
+ *cost += regs_per_insn_1st
+ + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
+ + regs_per_insn_sub - 1)
+ / regs_per_insn_sub);
+ return true;
+ }
+
+ }
+ return false;
+ }
+ case DIV:
+ case UDIV:
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ *cost = COSTS_N_INSNS (speed_p
+ ? extra_cost->fp[mode != SFmode].div : 1);
+ else if (mode == SImode && TARGET_IDIV)
+ *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
+ else
+ *cost = LIBCALL_COST (2);
+ return false; /* All arguments must be in registers. */
+
+ case MOD:
+ case UMOD:
+ *cost = LIBCALL_COST (2);
+ return false; /* All arguments must be in registers. */
+
+ case ROTATE:
+ if (mode == SImode && REG_P (XEXP (x, 1)))
+ {
+ *cost = (COSTS_N_INSNS (2)
+ + rtx_cost (XEXP (x, 0), code, 0, speed_p));
+ if (speed_p)
+ *cost += extra_cost->alu.shift_reg;
+ return true;
+ }
+ /* Fall through */
+ case ROTATERT:
+ case ASHIFT:
+ case LSHIFTRT:
+ case ASHIFTRT:
+ if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
+ {
+ *cost = (COSTS_N_INSNS (3)
+ + rtx_cost (XEXP (x, 0), code, 0, speed_p));
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.shift;
+ return true;
+ }
+ else if (mode == SImode)
+ {
+ *cost = (COSTS_N_INSNS (1)
+ + rtx_cost (XEXP (x, 0), code, 0, speed_p));
+ /* Slightly disparage register shifts at -Os, but not by much. */
+ if (!CONST_INT_P (XEXP (x, 1)))
+ *cost += (speed_p ? extra_cost->alu.shift_reg : 1
+ + rtx_cost (XEXP (x, 1), code, 1, speed_p));
+ return true;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < 4)
+ {
+ if (code == ASHIFT)
+ {
+ *cost = (COSTS_N_INSNS (1)
+ + rtx_cost (XEXP (x, 0), code, 0, speed_p));
+ /* Slightly disparage register shifts at -Os, but not by
+ much. */
+ if (!CONST_INT_P (XEXP (x, 1)))
+ *cost += (speed_p ? extra_cost->alu.shift_reg : 1
+ + rtx_cost (XEXP (x, 1), code, 1, speed_p));
+ }
+ else if (code == LSHIFTRT || code == ASHIFTRT)
+ {
+ if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
+ {
+ /* Can use SBFX/UBFX. */
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.bfx;
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ }
+ else
+ {
+ *cost = COSTS_N_INSNS (2);
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ if (speed_p)
+ {
+ if (CONST_INT_P (XEXP (x, 1)))
+ *cost += 2 * extra_cost->alu.shift;
+ else
+ *cost += (extra_cost->alu.shift
+ + extra_cost->alu.shift_reg);
+ }
+ else
+ /* Slightly disparage register shifts. */
+ *cost += !CONST_INT_P (XEXP (x, 1));
+ }
+ }
+ else /* Rotates. */
+ {
+ *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ if (speed_p)
+ {
+ if (CONST_INT_P (XEXP (x, 1)))
+ *cost += (2 * extra_cost->alu.shift
+ + extra_cost->alu.log_shift);
+ else
+ *cost += (extra_cost->alu.shift
+ + extra_cost->alu.shift_reg
+ + extra_cost->alu.log_shift_reg);
+ }
+ }
+ return true;
+ }
+
+ *cost = LIBCALL_COST (2);
+ return false;
+
+ case MINUS:
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ || GET_CODE (XEXP (x, 1)) == MULT)
+ {
+ rtx mul_op0, mul_op1, sub_op;
+
+ if (speed_p)
+ *cost += extra_cost->fp[mode != SFmode].mult_addsub;
+
+ if (GET_CODE (XEXP (x, 0)) == MULT)
+ {
+ mul_op0 = XEXP (XEXP (x, 0), 0);
+ mul_op1 = XEXP (XEXP (x, 0), 1);
+ sub_op = XEXP (x, 1);
+ }
+ else
+ {
+ mul_op0 = XEXP (XEXP (x, 1), 0);
+ mul_op1 = XEXP (XEXP (x, 1), 1);
+ sub_op = XEXP (x, 0);
+ }
+
+ /* The first operand of the multiply may be optionally
+ negated. */
+ if (GET_CODE (mul_op0) == NEG)
+ mul_op0 = XEXP (mul_op0, 0);
+
+ *cost += (rtx_cost (mul_op0, code, 0, speed_p)
+ + rtx_cost (mul_op1, code, 0, speed_p)
+ + rtx_cost (sub_op, code, 0, speed_p));
+
+ return true;
+ }
+
+ if (speed_p)
+ *cost += extra_cost->fp[mode != SFmode].addsub;
+ return false;
+ }
+
+ if (mode == SImode)
+ {
+ rtx shift_by_reg = NULL;
+ rtx shift_op;
+ rtx non_shift_op;
+
+ *cost = COSTS_N_INSNS (1);
+
+ shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
+ if (shift_op == NULL)
+ {
+ shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
+ non_shift_op = XEXP (x, 0);
+ }
+ else
+ non_shift_op = XEXP (x, 1);
+
+ if (shift_op != NULL)
+ {
+ if (shift_by_reg != NULL)
+ {
+ if (speed_p)
+ *cost += extra_cost->alu.arith_shift_reg;
+ *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
+ }
+ else if (speed_p)
+ *cost += extra_cost->alu.arith_shift;
+
+ *cost += (rtx_cost (shift_op, code, 0, speed_p)
+ + rtx_cost (non_shift_op, code, 0, speed_p));
+ return true;
+ }
+
+ if (arm_arch_thumb2
+ && GET_CODE (XEXP (x, 1)) == MULT)
+ {
+ /* MLS. */
+ if (speed_p)
+ *cost += extra_cost->mult[0].add;
+ *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
+ + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
+ + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
+ return true;
+ }
+
+ if (CONST_INT_P (XEXP (x, 0)))
+ {
+ int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
+ INTVAL (XEXP (x, 0)), NULL_RTX,
+ NULL_RTX, 1, 0);
+ *cost = COSTS_N_INSNS (insns);
+ if (speed_p)
+ *cost += insns * extra_cost->alu.arith;
+ *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
+ return true;
+ }
+
+ return false;
+ }
+
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < 4)
+ {
+ /* Slightly disparage, as we might need to widen the result. */
+ *cost = 1 + COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.arith;
+
+ if (CONST_INT_P (XEXP (x, 0)))
+ {
+ *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
+ return true;
+ }
+
+ return false;
+ }
+
+ if (mode == DImode)
+ {
+ *cost = COSTS_N_INSNS (2);
+
+ if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
+ {
+ rtx op1 = XEXP (x, 1);
+
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.arith;
+
+ if (GET_CODE (op1) == ZERO_EXTEND)
+ *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
+ else
+ *cost += rtx_cost (op1, MINUS, 1, speed_p);
+ *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
+ 0, speed_p);
+ return true;
+ }
+ else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+ {
+ if (speed_p)
+ *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
+ *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
+ 0, speed_p)
+ + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
+ return true;
+ }
+ else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+ || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
+ {
+ if (speed_p)
+ *cost += (extra_cost->alu.arith
+ + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+ ? extra_cost->alu.arith
+ : extra_cost->alu.arith_shift));
+ *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
+ + rtx_cost (XEXP (XEXP (x, 1), 0),
+ GET_CODE (XEXP (x, 1)), 0, speed_p));
+ return true;
+ }
+
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.arith;
+ return false;
+ }
+
+ /* Vector mode? */
+
+ *cost = LIBCALL_COST (2);
+ return false;
+
+ case PLUS:
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == MULT)
+ {
+ rtx mul_op0, mul_op1, add_op;
+
+ if (speed_p)
+ *cost += extra_cost->fp[mode != SFmode].mult_addsub;
+
+ mul_op0 = XEXP (XEXP (x, 0), 0);
+ mul_op1 = XEXP (XEXP (x, 0), 1);
+ add_op = XEXP (x, 1);
+
+ *cost += (rtx_cost (mul_op0, code, 0, speed_p)
+ + rtx_cost (mul_op1, code, 0, speed_p)
+ + rtx_cost (add_op, code, 0, speed_p));
+
+ return true;
+ }
+
+ if (speed_p)
+ *cost += extra_cost->fp[mode != SFmode].addsub;
+ return false;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ *cost = LIBCALL_COST (2);
+ return false;
+ }
+
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < 4)
+ {
+ /* Narrow modes can be synthesized in SImode, but the range
+ of useful sub-operations is limited. */
+ if (CONST_INT_P (XEXP (x, 1)))
+ {
+ int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
+ INTVAL (XEXP (x, 1)), NULL_RTX,
+ NULL_RTX, 1, 0);
+ *cost = COSTS_N_INSNS (insns);
+ if (speed_p)
+ *cost += insns * extra_cost->alu.arith;
+ /* Slightly penalize a narrow operation as the result may
+ need widening. */
+ *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
+ return true;
+ }
+
+ /* Slightly penalize a narrow operation as the result may
+ need widening. */
+ *cost = 1 + COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.arith;
+
+ return false;
+ }
+
+ if (mode == SImode)
+ {
+ rtx shift_op, shift_reg;
+
+ *cost = COSTS_N_INSNS (1);
+ if (TARGET_INT_SIMD
+ && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+ || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
+ {
+ /* UXTA[BH] or SXTA[BH]. */
+ if (speed_p)
+ *cost += extra_cost->alu.extnd_arith;
+ *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
+ speed_p)
+ + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
+ return true;
+ }
+
+ shift_reg = NULL;
+ shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
+ if (shift_op != NULL)
+ {
+ if (shift_reg)
+ {
+ if (speed_p)
+ *cost += extra_cost->alu.arith_shift_reg;
+ *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
+ }
+ else if (speed_p)
+ *cost += extra_cost->alu.arith_shift;
+
+ *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+ return true;
+ }
+ if (GET_CODE (XEXP (x, 0)) == MULT)
+ {
+ rtx mul_op = XEXP (x, 0);
+
+ *cost = COSTS_N_INSNS (1);
+
+ if (TARGET_DSP_MULTIPLY
+ && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
+ && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
+ || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
+ && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
+ && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
+ || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
+ && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
+ && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
+ && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
+ || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
+ && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
+ && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
+ == 16))))))
+ {
+ /* SMLA[BT][BT]. */
+ if (speed_p)
+ *cost += extra_cost->mult[0].extend_add;
+ *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
+ SIGN_EXTEND, 0, speed_p)
+ + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
+ SIGN_EXTEND, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+ return true;
+ }
+
+ if (speed_p)
+ *cost += extra_cost->mult[0].add;
+ *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
+ + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
+ + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+ return true;
+ }
+ if (CONST_INT_P (XEXP (x, 1)))
+ {
+ int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
+ INTVAL (XEXP (x, 1)), NULL_RTX,
+ NULL_RTX, 1, 0);
+ *cost = COSTS_N_INSNS (insns);
+ if (speed_p)
+ *cost += insns * extra_cost->alu.arith;
+ *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
+ return true;
+ }
+ return false;
+ }
+
+ if (mode == DImode)
+ {
+ if (arm_arch3m
+ && GET_CODE (XEXP (x, 0)) == MULT
+ && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
+ || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->mult[1].extend_add;
+ *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+ ZERO_EXTEND, 0, speed_p)
+ + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
+ ZERO_EXTEND, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+ return true;
+ }
+
+ *cost = COSTS_N_INSNS (2);
+
+ if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+ || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+ {
+ if (speed_p)
+ *cost += (extra_cost->alu.arith
+ + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+ ? extra_cost->alu.arith
+ : extra_cost->alu.arith_shift));
+
+ *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
+ speed_p)
+ + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
+ return true;
+ }
+
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.arith;
+ return false;
+ }
+
+ /* Vector mode? */
+ *cost = LIBCALL_COST (2);
+ return false;
+
+ case AND: case XOR: case IOR:
+ if (mode == SImode)
+ {
+ enum rtx_code subcode = GET_CODE (XEXP (x, 0));
+ rtx op0 = XEXP (x, 0);
+ rtx shift_op, shift_reg;
+
+ *cost = COSTS_N_INSNS (1);
+
+ if (subcode == NOT
+ && (code == AND
+ || (code == IOR && TARGET_THUMB2)))
+ op0 = XEXP (op0, 0);
+
+ shift_reg = NULL;
+ shift_op = shifter_op_p (op0, &shift_reg);
+ if (shift_op != NULL)
+ {
+ if (shift_reg)
+ {
+ if (speed_p)
+ *cost += extra_cost->alu.log_shift_reg;
+ *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
+ }
+ else if (speed_p)
+ *cost += extra_cost->alu.log_shift;
+
+ *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), code, 1, speed_p));
+ return true;
+ }
+
+ if (CONST_INT_P (XEXP (x, 1)))
+ {
+ int insns = arm_gen_constant (code, SImode, NULL_RTX,
+ INTVAL (XEXP (x, 1)), NULL_RTX,
+ NULL_RTX, 1, 0);
+
+ *cost = COSTS_N_INSNS (insns);
+ if (speed_p)
+ *cost += insns * extra_cost->alu.logical;
+ *cost += rtx_cost (op0, code, 0, speed_p);
+ return true;
+ }
+
+ if (speed_p)
+ *cost += extra_cost->alu.logical;
+ *cost += (rtx_cost (op0, code, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), code, 1, speed_p));
+ return true;
+ }
+
+ if (mode == DImode)
+ {
+ rtx op0 = XEXP (x, 0);
+ enum rtx_code subcode = GET_CODE (op0);
+
+ *cost = COSTS_N_INSNS (2);
+
+ if (subcode == NOT
+ && (code == AND
+ || (code == IOR && TARGET_THUMB2)))
+ op0 = XEXP (op0, 0);
+
+ if (GET_CODE (op0) == ZERO_EXTEND)
+ {
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.logical;
+
+ *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), code, 0, speed_p));
+ return true;
+ }
+ else if (GET_CODE (op0) == SIGN_EXTEND)
+ {
+ if (speed_p)
+ *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
+
+ *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), code, 0, speed_p));
+ return true;
+ }
+
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.logical;
+
+ return true;
+ }
+ /* Vector mode? */
+
+ *cost = LIBCALL_COST (2);
+ return false;
+
+ case MULT:
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ {
+ rtx op0 = XEXP (x, 0);
+
+ *cost = COSTS_N_INSNS (1);
+
+ if (GET_CODE (op0) == NEG)
+ op0 = XEXP (op0, 0);
+
+ if (speed_p)
+ *cost += extra_cost->fp[mode != SFmode].mult;
+
+ *cost += (rtx_cost (op0, MULT, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
+ return true;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ *cost = LIBCALL_COST (2);
+ return false;
+ }
+
+ if (mode == SImode)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (TARGET_DSP_MULTIPLY
+ && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+ && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+ || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
+ && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+ && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
+ || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
+ && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
+ || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
+ && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+ && (INTVAL (XEXP (XEXP (x, 1), 1))
+ == 16))))))
+ {
+ /* SMUL[TB][TB]. */
+ if (speed_p)
+ *cost += extra_cost->mult[0].extend;
+ *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
+ return true;
+ }
+ if (speed_p)
+ *cost += extra_cost->mult[0].simple;
+ return false;
+ }
+
+ if (mode == DImode)
+ {
+ if (arm_arch3m
+ && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+ && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+ || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+ && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->mult[1].extend;
+ *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
+ ZERO_EXTEND, 0, speed_p)
+ + rtx_cost (XEXP (XEXP (x, 1), 0),
+ ZERO_EXTEND, 0, speed_p));
+ return true;
+ }
+
+ *cost = LIBCALL_COST (2);
+ return false;
+ }
+
+ /* Vector mode? */
+ *cost = LIBCALL_COST (2);
+ return false;
+
+ case NEG:
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[mode != SFmode].neg;
+
+ return false;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ *cost = LIBCALL_COST (1);
+ return false;
+ }
+
+ if (mode == SImode)
+ {
+ if (GET_CODE (XEXP (x, 0)) == ABS)
+ {
+ *cost = COSTS_N_INSNS (2);
+ /* Assume the non-flag-changing variant. */
+ if (speed_p)
+ *cost += (extra_cost->alu.log_shift
+ + extra_cost->alu.arith_shift);
+ *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
+ return true;
+ }
+
+ if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
+ {
+ *cost = COSTS_N_INSNS (2);
+ /* No extra cost for MOV imm and MVN imm. */
+ /* If the comparison op is using the flags, there's no further
+ cost, otherwise we need to add the cost of the comparison. */
+ if (!(REG_P (XEXP (XEXP (x, 0), 0))
+ && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
+ && XEXP (XEXP (x, 0), 1) == const0_rtx))
+ {
+ *cost += (COSTS_N_INSNS (1)
+ + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
+ speed_p)
+ + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
+ speed_p));
+ if (speed_p)
+ *cost += extra_cost->alu.arith;
+ }
+ return true;
+ }
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.arith;
+ return false;
+ }
+
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < 4)
+ {
+ /* Slightly disparage, as we might need an extend operation. */
+ *cost = 1 + COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.arith;
+ return false;
+ }
+
+ if (mode == DImode)
+ {
+ *cost = COSTS_N_INSNS (2);
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.arith;
+ return false;
+ }
+
+ /* Vector mode? */
+ *cost = LIBCALL_COST (1);
+ return false;
+
+ case NOT:
+ if (mode == SImode)
+ {
+ rtx shift_op;
+ rtx shift_reg = NULL;
+
+ *cost = COSTS_N_INSNS (1);
+ shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
+
+ if (shift_op)
+ {
+ if (shift_reg != NULL)
+ {
+ if (speed_p)
+ *cost += extra_cost->alu.log_shift_reg;
+ *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
+ }
+ else if (speed_p)
+ *cost += extra_cost->alu.log_shift;
+ *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
+ return true;
+ }
+
+ if (speed_p)
+ *cost += extra_cost->alu.logical;
+ return false;
+ }
+ if (mode == DImode)
+ {
+ *cost = COSTS_N_INSNS (2);
+ return false;
+ }
+
+ /* Vector mode? */
+
+ *cost += LIBCALL_COST (1);
+ return false;
+
+ case IF_THEN_ELSE:
+ {
+ if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
+ {
+ *cost = COSTS_N_INSNS (4);
+ return true;
+ }
+ int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
+ int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
+
+ *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
+ /* Assume that if one arm of the if_then_else is a register,
+ that it will be tied with the result and eliminate the
+ conditional insn. */
+ if (REG_P (XEXP (x, 1)))
+ *cost += op2cost;
+ else if (REG_P (XEXP (x, 2)))
+ *cost += op1cost;
+ else
+ {
+ if (speed_p)
+ {
+ if (extra_cost->alu.non_exec_costs_exec)
+ *cost += op1cost + op2cost + extra_cost->alu.non_exec;
+ else
+ *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
+ }
+ else
+ *cost += op1cost + op2cost;
+ }
+ }
+ return true;
+
+ case COMPARE:
+ if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
+ *cost = 0;
+ else
+ {
+ enum machine_mode op0mode;
+ /* We'll mostly assume that the cost of a compare is the cost of the
+ LHS. However, there are some notable exceptions. */
+
+ /* Floating point compares are never done as side-effects. */
+ op0mode = GET_MODE (XEXP (x, 0));
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
+ && (op0mode == SFmode || !TARGET_VFP_SINGLE))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[op0mode != SFmode].compare;
+
+ if (XEXP (x, 1) == CONST0_RTX (op0mode))
+ {
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ return true;
+ }
+
+ return false;
+ }
+ else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
+ {
+ *cost = LIBCALL_COST (2);
+ return false;
+ }
+
+ /* DImode compares normally take two insns. */
+ if (op0mode == DImode)
+ {
+ *cost = COSTS_N_INSNS (2);
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.arith;
+ return false;
+ }
+
+ if (op0mode == SImode)
+ {
+ rtx shift_op;
+ rtx shift_reg;
+
+ if (XEXP (x, 1) == const0_rtx
+ && !(REG_P (XEXP (x, 0))
+ || (GET_CODE (XEXP (x, 0)) == SUBREG
+ && REG_P (SUBREG_REG (XEXP (x, 0))))))
+ {
+ *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
+
+ /* Multiply operations that set the flags are often
+ significantly more expensive. */
+ if (speed_p
+ && GET_CODE (XEXP (x, 0)) == MULT
+ && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
+ *cost += extra_cost->mult[0].flag_setting;
+
+ if (speed_p
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
+ 0), 1), mode))
+ *cost += extra_cost->mult[0].flag_setting;
+ return true;
+ }
+
+ shift_reg = NULL;
+ shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
+ if (shift_op != NULL)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (shift_reg != NULL)
+ {
+ *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
+ if (speed_p)
+ *cost += extra_cost->alu.arith_shift_reg;
+ }
+ else if (speed_p)
+ *cost += extra_cost->alu.arith_shift;
+ *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+ + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
+ return true;
+ }
+
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.arith;
+ if (CONST_INT_P (XEXP (x, 1))
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
+ {
+ *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
+ return true;
+ }
+ return false;
+ }
+
+ /* Vector mode? */
+
+ *cost = LIBCALL_COST (2);
+ return false;
+ }
+ return true;
+
+ case EQ:
+ case NE:
+ case LT:
+ case LE:
+ case GT:
+ case GE:
+ case LTU:
+ case LEU:
+ case GEU:
+ case GTU:
+ case ORDERED:
+ case UNORDERED:
+ case UNEQ:
+ case UNLE:
+ case UNLT:
+ case UNGE:
+ case UNGT:
+ case LTGT:
+ if (outer_code == SET)
+ {
+ /* Is it a store-flag operation? */
+ if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
+ && XEXP (x, 1) == const0_rtx)
+ {
+ /* Thumb also needs an IT insn. */
+ *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
+ return true;
+ }
+ if (XEXP (x, 1) == const0_rtx)
+ {
+ switch (code)
+ {
+ case LT:
+ /* LSR Rd, Rn, #31. */
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.shift;
+ break;
+
+ case EQ:
+ /* RSBS T1, Rn, #0
+ ADC Rd, Rn, T1. */
+
+ case NE:
+ /* SUBS T1, Rn, #1
+ SBC Rd, Rn, T1. */
+ *cost = COSTS_N_INSNS (2);
+ break;
+
+ case LE:
+ /* RSBS T1, Rn, Rn, LSR #31
+ ADC Rd, Rn, T1. */
+ *cost = COSTS_N_INSNS (2);
+ if (speed_p)
+ *cost += extra_cost->alu.arith_shift;
+ break;
+
+ case GT:
+ /* RSB Rd, Rn, Rn, ASR #1
+ LSR Rd, Rd, #31. */
+ *cost = COSTS_N_INSNS (2);
+ if (speed_p)
+ *cost += (extra_cost->alu.arith_shift
+ + extra_cost->alu.shift);
+ break;
+
+ case GE:
+ /* ASR Rd, Rn, #31
+ ADD Rd, Rn, #1. */
+ *cost = COSTS_N_INSNS (2);
+ if (speed_p)
+ *cost += extra_cost->alu.shift;
+ break;
+
+ default:
+ /* Remaining cases are either meaningless or would take
+ three insns anyway. */
+ *cost = COSTS_N_INSNS (3);
+ break;
+ }
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ return true;
+ }
+ else
+ {
+ *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
+ if (CONST_INT_P (XEXP (x, 1))
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
+ {
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ return true;
+ }
+
+ return false;
+ }
+ }
+ /* Not directly inside a set. If it involves the condition code
+ register it must be the condition for a branch, cond_exec or
+ I_T_E operation. Since the comparison is performed elsewhere
+ this is just the control part which has no additional
+ cost. */
+ else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
+ && XEXP (x, 1) == const0_rtx)
+ {
+ *cost = 0;
+ return true;
+ }
+
+ case ABS:
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[mode != SFmode].neg;
+
+ return false;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ *cost = LIBCALL_COST (1);
+ return false;
+ }
+
+ if (mode == SImode)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
+ return false;
+ }
+ /* Vector mode? */
+ *cost = LIBCALL_COST (1);
+ return false;
+
+ case SIGN_EXTEND:
+ if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
+ && MEM_P (XEXP (x, 0)))
+ {
+ *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
+
+ if (mode == DImode)
+ *cost += COSTS_N_INSNS (1);
+
+ if (!speed_p)
+ return true;
+
+ if (GET_MODE (XEXP (x, 0)) == SImode)
+ *cost += extra_cost->ldst.load;
+ else
+ *cost += extra_cost->ldst.load_sign_extend;
+
+ if (mode == DImode)
+ *cost += extra_cost->alu.shift;
+
+ return true;
+ }
+
+ /* Widening from less than 32-bits requires an extend operation. */
+ if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
+ {
+ /* We have SXTB/SXTH. */
+ *cost = COSTS_N_INSNS (1);
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ if (speed_p)
+ *cost += extra_cost->alu.extnd;
+ }
+ else if (GET_MODE (XEXP (x, 0)) != SImode)
+ {
+ /* Needs two shifts. */
+ *cost = COSTS_N_INSNS (2);
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.shift;
+ }
+
+ /* Widening beyond 32-bits requires one more insn. */
+ if (mode == DImode)
+ {
+ *cost += COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.shift;
+ }
+
+ return true;
+
+ case ZERO_EXTEND:
+ if ((arm_arch4
+ || GET_MODE (XEXP (x, 0)) == SImode
+ || GET_MODE (XEXP (x, 0)) == QImode)
+ && MEM_P (XEXP (x, 0)))
+ {
+ *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
+
+ if (mode == DImode)
+ *cost += COSTS_N_INSNS (1); /* No speed penalty. */
+
+ return true;
+ }
+
+ /* Widening from less than 32-bits requires an extend operation. */
+ if (GET_MODE (XEXP (x, 0)) == QImode)
+ {
+ /* UXTB can be a shorter instruction in Thumb2, but it might
+ be slower than the AND Rd, Rn, #255 alternative. When
+ optimizing for speed it should never be slower to use
+ AND, and we don't really model 16-bit vs 32-bit insns
+ here. */
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.logical;
+ }
+ else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
+ {
+ /* We have UXTB/UXTH. */
+ *cost = COSTS_N_INSNS (1);
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ if (speed_p)
+ *cost += extra_cost->alu.extnd;
+ }
+ else if (GET_MODE (XEXP (x, 0)) != SImode)
+ {
+ /* Needs two shifts. It's marginally preferable to use
+ shifts rather than two BIC instructions as the second
+ shift may merge with a subsequent insn as a shifter
+ op. */
+ *cost = COSTS_N_INSNS (2);
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.shift;
+ }
+
+ /* Widening beyond 32-bits requires one more insn. */
+ if (mode == DImode)
+ {
+ *cost += COSTS_N_INSNS (1); /* No speed penalty. */
+ }
+
+ return true;
+
+ case CONST_INT:
+ *cost = 0;
+ /* CONST_INT has no mode, so we cannot tell for sure how many
+ insns are really going to be needed. The best we can do is
+ look at the value passed. If it fits in SImode, then assume
+ that's the mode it will be used for. Otherwise assume it
+ will be used in DImode. */
+ if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
+ mode = SImode;
+ else
+ mode = DImode;
+
+ /* Avoid blowing up in arm_gen_constant (). */
+ if (!(outer_code == PLUS
+ || outer_code == AND
+ || outer_code == IOR
+ || outer_code == XOR
+ || outer_code == MINUS))
+ outer_code = SET;
+
+ const_int_cost:
+ if (mode == SImode)
+ {
+ *cost += 0;
+ *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
+ INTVAL (x), NULL, NULL,
+ 0, 0));
+ /* Extra costs? */
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS (arm_gen_constant
+ (outer_code, SImode, NULL,
+ trunc_int_for_mode (INTVAL (x), SImode),
+ NULL, NULL, 0, 0)
+ + arm_gen_constant (outer_code, SImode, NULL,
+ INTVAL (x) >> 32, NULL,
+ NULL, 0, 0));
+ /* Extra costs? */
+ }
+
+ return true;
+
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ if (speed_p)
+ {
+ if (arm_arch_thumb2 && !flag_pic)
+ *cost = COSTS_N_INSNS (2);
+ else
+ *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
+ }
+ else
+ *cost = COSTS_N_INSNS (2);
+
+ if (flag_pic)
+ {
+ *cost += COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.arith;
+ }
+
+ return true;
+
+ case CONST_FIXED:
+ *cost = COSTS_N_INSNS (4);
+ /* Fixme. */
+ return true;
+
+ case CONST_DOUBLE:
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
+ && (mode == SFmode || !TARGET_VFP_SINGLE))
+ {
+ if (vfp3_const_double_rtx (x))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[mode == DFmode].fpconst;
+ return true;
+ }
+
+ if (speed_p)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (mode == DFmode)
+ *cost += extra_cost->ldst.loadd;
+ else
+ *cost += extra_cost->ldst.loadf;
+ }
+ else
+ *cost = COSTS_N_INSNS (2 + (mode == DFmode));
+
+ return true;
+ }
+ *cost = COSTS_N_INSNS (4);
+ return true;
+
+ case CONST_VECTOR:
+ /* Fixme. */
+ if (TARGET_NEON
+ && TARGET_HARD_FLOAT
+ && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
+ && neon_immediate_valid_for_move (x, mode, NULL, NULL))
+ *cost = COSTS_N_INSNS (1);
+ else
+ *cost = COSTS_N_INSNS (4);
+ return true;
+
+ case HIGH:
+ case LO_SUM:
+ *cost = COSTS_N_INSNS (1);
+ /* When optimizing for size, we prefer constant pool entries to
+ MOVW/MOVT pairs, so bump the cost of these slightly. */
+ if (!speed_p)
+ *cost += 1;
+ return true;
+
+ case CLZ:
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.clz;
+ return false;
+
+ case SMIN:
+ if (XEXP (x, 1) == const0_rtx)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.log_shift;
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ return true;
+ }
+ /* Fall through. */
+ case SMAX:
+ case UMIN:
+ case UMAX:
+ *cost = COSTS_N_INSNS (2);
+ return false;
+
+ case TRUNCATE:
+ if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
+ || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+ && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
+ == ZERO_EXTEND))))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->mult[1].extend;
+ *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
+ speed_p)
+ + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
+ 0, speed_p));
+ return true;
+ }
+ *cost = LIBCALL_COST (1);
+ return false;
+
+ case UNSPEC:
+ return arm_unspec_cost (x, outer_code, speed_p, cost);
+
+ case PC:
+ /* Reading the PC is like reading any other register. Writing it
+ is more expensive, but we take that into account elsewhere. */
+ *cost = 0;
+ return true;
+
+ case ZERO_EXTRACT:
+ /* TODO: Simple zero_extract of bottom bits using AND. */
+ /* Fall through. */
+ case SIGN_EXTRACT:
+ if (arm_arch6
+ && mode == SImode
+ && CONST_INT_P (XEXP (x, 1))
+ && CONST_INT_P (XEXP (x, 2)))
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->alu.bfx;
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ return true;
+ }
+ /* Without UBFX/SBFX, need to resort to shift operations. */
+ *cost = COSTS_N_INSNS (2);
+ if (speed_p)
+ *cost += 2 * extra_cost->alu.shift;
+ *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
+ return true;
+
+ case FLOAT_EXTEND:
+ if (TARGET_HARD_FLOAT)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[mode == DFmode].widen;
+ if (!TARGET_FPU_ARMV8
+ && GET_MODE (XEXP (x, 0)) == HFmode)
+ {
+ /* Pre v8, widening HF->DF is a two-step process, first
+ widening to SFmode. */
+ *cost += COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[0].widen;
+ }
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ return true;
+ }
+
+ *cost = LIBCALL_COST (1);
+ return false;
+
+ case FLOAT_TRUNCATE:
+ if (TARGET_HARD_FLOAT)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[mode == DFmode].narrow;
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ return true;
+ /* Vector modes? */
+ }
+ *cost = LIBCALL_COST (1);
+ return false;
+
+ case FIX:
+ case UNSIGNED_FIX:
+ if (TARGET_HARD_FLOAT)
+ {
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
+ /* Strip of the 'cost' of rounding towards zero. */
+ if (GET_CODE (XEXP (x, 0)) == FIX)
+ *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
+ else
+ *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
+ /* ??? Increase the cost to deal with transferring from
+ FP -> CORE registers? */
+ return true;
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT
+ && TARGET_FPU_ARMV8)
+ {
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[mode == DFmode].roundint;
+ return false;
+ }
+ /* Vector costs? */
+ }
+ *cost = LIBCALL_COST (1);
+ return false;
+
+ case FLOAT:
+ case UNSIGNED_FLOAT:
+ if (TARGET_HARD_FLOAT)
+ {
+ /* ??? Increase the cost to deal with transferring from CORE
+ -> FP registers? */
+ *cost = COSTS_N_INSNS (1);
+ if (speed_p)
+ *cost += extra_cost->fp[mode == DFmode].fromint;
+ return false;
+ }
+ *cost = LIBCALL_COST (1);
+ return false;
+
+ case CALL:
+ *cost = COSTS_N_INSNS (1);
+ return true;
+
+ case ASM_OPERANDS:
+ /* Just a guess. Cost one insn per input. */
+ *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
+ return true;
+
+ default:
+ if (mode != VOIDmode)
+ *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+ else
+ *cost = COSTS_N_INSNS (4); /* Who knows? */
+ return false;
+ }
+}
+
/* RTX costs when optimizing for size. */
static bool
arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
int *total, bool speed)
{
- if (!speed)
- return arm_size_rtx_costs (x, (enum rtx_code) code,
- (enum rtx_code) outer_code, total);
+ bool result;
+
+ if (TARGET_OLD_RTX_COSTS
+ || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
+ {
+ /* Old way. (Deprecated.) */
+ if (!speed)
+ result = arm_size_rtx_costs (x, (enum rtx_code) code,
+ (enum rtx_code) outer_code, total);
+ else
+ result = current_tune->rtx_costs (x, (enum rtx_code) code,
+ (enum rtx_code) outer_code, total,
+ speed);
+ }
else
- return current_tune->rtx_costs (x, (enum rtx_code) code,
+ {
+ /* New way. */
+ if (current_tune->insn_extra_cost)
+ result = arm_new_rtx_costs (x, (enum rtx_code) code,
+ (enum rtx_code) outer_code,
+ current_tune->insn_extra_cost,
+ total, speed);
+ /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
+ && current_tune->insn_extra_cost != NULL */
+ else
+ result = arm_new_rtx_costs (x, (enum rtx_code) code,
(enum rtx_code) outer_code,
- total, speed);
+ &generic_extra_costs, total, speed);
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ print_rtl_single (dump_file, x);
+ fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
+ *total, result ? "final" : "partial");
+ }
+ return result;
}
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index b9ae2b0..66e128e 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -243,6 +243,14 @@ mrestrict-it
Target Report Var(arm_restrict_it) Init(2)
Generate IT blocks appropriate for ARMv8.
+mold-rtx-costs
+Target Report Mask(OLD_RTX_COSTS)
+Use the old RTX costing tables (transitional).
+
+mnew-generic-costs
+Target Report Mask(NEW_GENERIC_COSTS)
+Use the new generic RTX cost tables if new core-specific cost table not available (transitional).
+
mfix-cortex-m3-ldrd
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions