aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/config/arm/arm.c827
2 files changed, 595 insertions, 244 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 84354df..7bb4789 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2009-01-13 Richard Earnshaw <rearnsha@arm.com>
+
+ * arm.c (struct processors): Pass for speed down into cost helper
+ functions.
+ (const_ok_for_op): Handle COMPARE and inequality nodes.
+ (arm_rtx_costs_1): Rewrite.
+ (arm_size_rtx_costs): Update prototype.
+ (arm_rtx_costs): Pass speed down to helper functions.
+ (arm_slowmul_rtx_costs): Rework cost calculations.
+ (arm_fastmul_rtx_costs, arm_xscale_rtx_costs): Likewise.
+ (arm_9e_rtx_costs): Likewise.
+
2009-01-13 Uros Bizjak <ubizjak@gmail.com>
* config/alpha/alpha.c (alpha_legitimate_address_p): Explicit
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index dce0a52..8c2aa5e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -126,12 +126,12 @@ static bool arm_function_ok_for_sibcall (tree, tree);
static void arm_internal_label (FILE *, const char *, unsigned long);
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
tree);
-static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
-static bool arm_size_rtx_costs (rtx, int, int, int *);
-static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
-static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
-static bool arm_xscale_rtx_costs (rtx, int, int, int *);
-static bool arm_9e_rtx_costs (rtx, int, int, int *);
+static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
+static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
+static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
+static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_rtx_costs (rtx, int, int, int *, bool);
static int arm_address_cost (rtx, bool);
static bool arm_memory_load_p (rtx);
@@ -619,7 +619,7 @@ struct processors
enum processor_type core;
const char *arch;
const unsigned long flags;
- bool (* rtx_costs) (rtx, int, int, int *);
+ bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
};
/* Not all of these give usefully different compilation alternatives,
@@ -1883,6 +1883,24 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
switch (code)
{
case PLUS:
+ case COMPARE:
+ case EQ:
+ case NE:
+ case GT:
+ case LE:
+ case LT:
+ case GE:
+ case GEU:
+ case LTU:
+ case GTU:
+ case LEU:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ case UNGE:
+ case UNLT:
+ case UNGT:
+ case UNLE:
return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
case MINUS: /* Should only occur with (MINUS I reg) => rsb */
@@ -4882,130 +4900,227 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
}
}
-
-/* Worker routine for arm_rtx_costs. */
-/* ??? This needs updating for thumb2. */
-static inline int
-arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
+static inline bool
+arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
enum rtx_code subcode;
+ rtx operand;
+ enum rtx_code code = GET_CODE (x);
int extra_cost;
+ *total = 0;
switch (code)
{
case MEM:
/* Memory costs quite a lot for the first word, but subsequent words
load at the equivalent of a single insn each. */
- return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
- + (GET_CODE (x) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
+ *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
+ return true;
case DIV:
case MOD:
case UDIV:
case UMOD:
- return optimize_size ? COSTS_N_INSNS (2) : 100;
+ if (TARGET_HARD_FLOAT && mode == SFmode)
+ *total = COSTS_N_INSNS (2);
+ else if (TARGET_HARD_FLOAT && mode == DFmode)
+ *total = COSTS_N_INSNS (4);
+ else
+ *total = COSTS_N_INSNS (20);
+ return false;
case ROTATE:
- if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
- return 4;
+ if (GET_CODE (XEXP (x, 1)) == REG)
+ *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
+ else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+ *total = rtx_cost (XEXP (x, 1), code, speed);
+
/* Fall through */
case ROTATERT:
if (mode != SImode)
- return 8;
+ {
+ *total += COSTS_N_INSNS (4);
+ return true;
+ }
+
/* Fall through */
case ASHIFT: case LSHIFTRT: case ASHIFTRT:
+ *total += rtx_cost (XEXP (x, 0), code, speed);
if (mode == DImode)
- return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
- + ((GET_CODE (XEXP (x, 0)) == REG
- || (GET_CODE (XEXP (x, 0)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
- ? 0 : 8));
+ {
+ *total += COSTS_N_INSNS (3);
+ return true;
+ }
- extra_cost = 1;
+ *total += COSTS_N_INSNS (1);
/* Increase the cost of complex shifts because they aren't any faster,
and reduce dual issue opportunities. */
if (arm_tune_cortex_a9
&& outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
- extra_cost++;
-
- return (extra_cost + ((GET_CODE (XEXP (x, 0)) == REG
- || (GET_CODE (XEXP (x, 0)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
- ? 0 : 4)
- + ((GET_CODE (XEXP (x, 1)) == REG
- || (GET_CODE (XEXP (x, 1)) == SUBREG
- && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
- || (GET_CODE (XEXP (x, 1)) == CONST_INT))
- ? 0 : 4));
+ ++*total;
+
+ return true;
case MINUS:
- if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
+ if (TARGET_THUMB2)
{
- extra_cost = rtx_cost (XEXP (x, 1), code, true);
- if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
- extra_cost += 4 * ARM_NUM_REGS (mode);
- return extra_cost;
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ *total = COSTS_N_INSNS (1);
+ else
+ *total = COSTS_N_INSNS (20);
+ }
+ else
+ *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+ /* Thumb2 does not have RSB, so all arguments must be
+ registers (subtracting a constant is canonicalized as
+ addition of the negated constant). */
+ return false;
}
if (mode == DImode)
- return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 0))
- || (GET_CODE (XEXP (x, 0)) == CONST_INT
- && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
- ? 0 : 8));
+ {
+ *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
+ if (GET_CODE (XEXP (x, 0)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 1))))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ return false;
+ }
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 1))))
- ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 0))
- || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 0))))
- ? 0 : 8));
-
- if (((GET_CODE (XEXP (x, 0)) == CONST_INT
- && const_ok_for_arm (INTVAL (XEXP (x, 0)))
- && REG_OR_SUBREG_REG (XEXP (x, 1))))
- || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
- || subcode == ASHIFTRT || subcode == LSHIFTRT
- || subcode == ROTATE || subcode == ROTATERT
- || (subcode == MULT
- && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
- && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
- (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
- && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
- && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
- || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
- && REG_OR_SUBREG_REG (XEXP (x, 0))))
- return 1;
+ {
+ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ {
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 0)))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 1)))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ return false;
+ }
+ *total = COSTS_N_INSNS (20);
+ return false;
+ }
+
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 0)) == CONST_INT
+ && const_ok_for_arm (INTVAL (XEXP (x, 0))))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
+
+ subcode = GET_CODE (XEXP (x, 1));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+ return true;
+ }
+
+ if (subcode == MULT
+ && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
+ && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
+ (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
+ return true;
+ }
+
+ if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
+ && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
+ *total += COSTS_N_INSNS (1);
+
+ return true;
+ }
+
/* Fall through */
case PLUS:
- if (arm_arch6 && mode == SImode
+ if (code == PLUS && arm_arch6 && mode == SImode
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
- return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0)
- + (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0);
+ {
+ *total = COSTS_N_INSNS (1);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
+ speed);
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
+ }
- if (GET_CODE (XEXP (x, 0)) == MULT)
+ /* MLA: All arguments must be registers. We filter out
+ multiplication by a power of two, so that we fall down into
+ the code below. */
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ && ! (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
+ (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
{
- extra_cost = rtx_cost (XEXP (x, 0), code, true);
- if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
- extra_cost += 4 * ARM_NUM_REGS (mode);
- return extra_cost;
+ /* The cost comes from the cost of the multiply. */
+ return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
- && arm_const_double_rtx (XEXP (x, 1))))
- ? 0 : 8));
+ {
+ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ {
+ *total = COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
+ && arm_const_double_rtx (XEXP (x, 1)))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ return false;
+ }
+
+ *total = COSTS_N_INSNS (20);
+ return false;
+ }
+
+ if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
+ if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+ && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
+ *total += COSTS_N_INSNS (1);
+ return true;
+ }
/* Fall through */
+
case AND: case XOR: case IOR:
extra_cost = 0;
@@ -5019,38 +5134,56 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
&& GET_CODE (XEXP (x, 1)) != CONST_INT)
|| (REG_OR_SUBREG_REG (XEXP (x, 0))
&& ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
- extra_cost = 4;
+ *total = 4;
if (mode == DImode)
- return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_INT
- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
- ? 0 : 8));
-
- if (REG_OR_SUBREG_REG (XEXP (x, 0)))
- return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
- + ((REG_OR_SUBREG_REG (XEXP (x, 1))
- || (GET_CODE (XEXP (x, 1)) == CONST_INT
- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
- ? 0 : 4));
-
- else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
- return (1 + extra_cost
- + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
- || subcode == LSHIFTRT || subcode == ASHIFTRT
- || subcode == ROTATE || subcode == ROTATERT
- || (subcode == MULT
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
- && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
- && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
- && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1))
- && !arm_tune_cortex_a9)
- || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
- ? 0 : 4));
+ {
+ *total += COSTS_N_INSNS (2);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
- return 8;
+ return false;
+ }
+
+ *total += COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ if (subcode == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
+ (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ if (subcode == UMIN || subcode == UMAX
+ || subcode == SMIN || subcode == SMAX)
+ {
+ *total = COSTS_N_INSNS (3);
+ return true;
+ }
+
+ return false;
case MULT:
/* This should have been handled by the CPU specific routines. */
@@ -5064,108 +5197,290 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
== GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
&& (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
- return 8;
- return 99;
+ {
+ *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
+ return true;
+ }
+ *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
+ return false;
case NEG:
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
+ {
+ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ *total = COSTS_N_INSNS (2);
+ return false;
+ }
+
/* Fall through */
case NOT:
- if (mode == DImode)
- return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
+ *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
+ if (mode == SImode && code == NOT)
+ {
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT
+ || (subcode == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
+ (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
+ {
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ /* Register shifts cost an extra cycle. */
+ if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
+ *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
+ subcode, speed);
+ return true;
+ }
+ }
- return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
+ return false;
case IF_THEN_ELSE:
if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
- return 14;
- return 2;
+ {
+ *total = COSTS_N_INSNS (4);
+ return true;
+ }
+
+ operand = XEXP (x, 0);
+
+ if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
+ && GET_CODE (XEXP (operand, 0)) == REG
+ && REGNO (XEXP (operand, 0)) == CC_REGNUM))
+ *total += COSTS_N_INSNS (1);
+ *total += (rtx_cost (XEXP (x, 1), code, speed)
+ + rtx_cost (XEXP (x, 2), code, speed));
+ return true;
+
+ case NE:
+ if (mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
+
+ case GE:
+ if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
+ && mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
+
+ case LT:
+ if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
+ && mode == SImode && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+ goto scc_insn;
+
+ case EQ:
+ case GT:
+ case LE:
+ case GEU:
+ case LTU:
+ case GTU:
+ case LEU:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ case UNGE:
+ case UNLT:
+ case UNGT:
+ case UNLE:
+ scc_insn:
+ /* SCC insns. In the case where the comparison has already been
+ performed, then they cost 2 instructions. Otherwise they need
+ an additional comparison before them. */
+ *total = COSTS_N_INSNS (2);
+ if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
+ {
+ return true;
+ }
+ /* Fall through */
case COMPARE:
- return 1;
+ if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
+ {
+ *total = 0;
+ return true;
+ }
+
+ *total += COSTS_N_INSNS (1);
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
+ {
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+ }
+
+ subcode = GET_CODE (XEXP (x, 0));
+ if (subcode == ASHIFT || subcode == ASHIFTRT
+ || subcode == LSHIFTRT
+ || subcode == ROTATE || subcode == ROTATERT)
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ if (subcode == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
+ (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
+ {
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
+ return true;
+ }
+
+ return false;
+
+ case UMIN:
+ case UMAX:
+ case SMIN:
+ case SMAX:
+ *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
+ if (GET_CODE (XEXP (x, 1)) != CONST_INT
+ || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
+ *total += rtx_cost (XEXP (x, 1), code, speed);
+ return true;
case ABS:
- return 4 + (mode == DImode ? 4 : 0);
+ if (GET_MODE_CLASS (mode == MODE_FLOAT))
+ {
+ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ *total = COSTS_N_INSNS (20);
+ return false;
+ }
+ *total = COSTS_N_INSNS (1);
+ if (mode == DImode)
+ *total += COSTS_N_INSNS (3);
+ return false;
case SIGN_EXTEND:
- if (arm_arch_thumb2 && mode == SImode)
- return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ *total = 0;
+ if (mode == DImode)
+ *total += COSTS_N_INSNS (1);
+
+ if (GET_MODE (XEXP (x, 0)) != SImode)
+ {
+ if (arm_arch6)
+ {
+ if (GET_CODE (XEXP (x, 0)) != MEM)
+ *total += COSTS_N_INSNS (1);
+ }
+ else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
+ *total += COSTS_N_INSNS (2);
+ }
+
+ return false;
+ }
- if (GET_MODE (XEXP (x, 0)) == QImode)
- return (4 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
/* Fall through */
case ZERO_EXTEND:
- if (arm_arch6 && mode == SImode)
- return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
-
- switch (GET_MODE (XEXP (x, 0)))
+ *total = 0;
+ if (GET_MODE_CLASS (mode) == MODE_INT)
{
- case QImode:
- return (1 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+ if (mode == DImode)
+ *total += COSTS_N_INSNS (1);
- case HImode:
- return (4 + (mode == DImode ? 4 : 0)
- + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+ if (GET_MODE (XEXP (x, 0)) != SImode)
+ {
+ if (arm_arch6)
+ {
+ if (GET_CODE (XEXP (x, 0)) != MEM)
+ *total += COSTS_N_INSNS (1);
+ }
+ else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
+ *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
+ 1 : 2);
+ }
- case SImode:
- return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
+ return false;
+ }
+ switch (GET_MODE (XEXP (x, 0)))
+ {
case V8QImode:
case V4HImode:
case V2SImode:
case V4QImode:
case V2HImode:
- return 1;
+ *total = COSTS_N_INSNS (1);
+ return false;
default:
gcc_unreachable ();
}
gcc_unreachable ();
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
+ return true;
+
case CONST_INT:
- if (const_ok_for_arm (INTVAL (x)))
- return outer == SET ? 2 : -1;
- else if (outer == AND
- && const_ok_for_arm (~INTVAL (x)))
- return -1;
- else if ((outer == COMPARE
- || outer == PLUS || outer == MINUS)
- && const_ok_for_arm (-INTVAL (x)))
- return -1;
+ if (const_ok_for_arm (INTVAL (x))
+ || const_ok_for_arm (~INTVAL (x)))
+ *total = COSTS_N_INSNS (1);
else
- return 5;
+ *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
+ INTVAL (x), NULL_RTX,
+ NULL_RTX, 0, 0));
+ return true;
case CONST:
case LABEL_REF:
case SYMBOL_REF:
- return 6;
+ *total = COSTS_N_INSNS (3);
+ return true;
case HIGH:
+ *total = COSTS_N_INSNS (1);
+ return true;
+
case LO_SUM:
- return (outer == SET) ? 1 : -1;
+ *total = COSTS_N_INSNS (1);
+ *total += rtx_cost (XEXP (x, 0), code, speed);
+ return true;
case CONST_DOUBLE:
- if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
- return outer == SET ? 2 : -1;
- else if ((outer == COMPARE || outer == PLUS)
- && neg_const_double_rtx_ok_for_fpa (x))
- return -1;
- return 7;
+ if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
+ *total = COSTS_N_INSNS (1);
+ else
+ *total = COSTS_N_INSNS (4);
+ return true;
default:
- return 99;
+ *total = COSTS_N_INSNS (4);
+ return false;
}
}
/* RTX costs when optimizing for size. */
static bool
-arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total)
{
enum machine_mode mode = GET_MODE (x);
-
if (TARGET_THUMB1)
{
/* XXX TBD. For now, use the standard costs. */
@@ -5395,19 +5710,22 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
/* RTX costs when optimizing for size. */
static bool
-arm_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
+arm_rtx_costs (rtx x, int code, int outer_code, int *total,
+ bool speed)
{
if (!speed)
return arm_size_rtx_costs (x, code, outer_code, total);
else
- return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total);
+ return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total,
+ speed);
}
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
supported on any "slowmul" cores, so it can be ignored. */
static bool
-arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
@@ -5423,8 +5741,8 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
if (GET_MODE_CLASS (mode) == MODE_FLOAT
|| mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
@@ -5440,20 +5758,19 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
for (j = 0; i && j < 32; j += booth_unit_size)
{
i >>= booth_unit_size;
- cost += 2;
+ cost++;
}
- *total = cost;
+ *total = COSTS_N_INSNS (cost);
+ *total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
- *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);;
}
}
@@ -5461,7 +5778,8 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
/* RTX cost for cores with a fast multiply unit (M variants). */
static bool
-arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
@@ -5482,16 +5800,15 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 8;
- return true;
+ *total = COSTS_N_INSNS(2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT
- || mode == DImode)
+ if (mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
@@ -5507,20 +5824,34 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
for (j = 0; i && j < 32; j += booth_unit_size)
{
i >>= booth_unit_size;
- cost += 2;
+ cost++;
}
- *total = cost;
- return true;
+ *total = COSTS_N_INSNS(cost);
+ return false;
}
- *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
+ if (mode == SImode)
+ {
+ *total = COSTS_N_INSNS (4);
+ return false;
+ }
+
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ }
+
+ /* Requires a lib call */
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
@@ -5529,7 +5860,7 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
so it can be ignored. */
static bool
-arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
@@ -5541,6 +5872,15 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
switch (code)
{
+ case COMPARE:
+ if (GET_CODE (XEXP (x, 0)) != MULT)
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
+
+ /* A COMPARE of a MULT is slow on XScale; the muls instruction
+ will stall until the multiplication is complete. */
+ *total = COSTS_N_INSNS (3);
+ return false;
+
case MULT:
/* There is no point basing this on the tuning, since it is always the
fast variant if it exists at all. */
@@ -5549,60 +5889,58 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 8;
- return true;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT
- || mode == DImode)
+ if (mode == DImode)
{
- *total = 30;
- return true;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
- unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
- & (unsigned HOST_WIDE_INT) 0xffffffff);
- int cost, const_ok = const_ok_for_arm (i);
+ /* If operand 1 is a constant we can more accurately
+ calculate the cost of the multiply. The multiplier can
+ retire 15 bits on the first cycle and a further 12 on the
+ second. We do, of course, have to load the constant into
+ a register first. */
+ unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
+ /* There's a general overhead of one cycle. */
+ int cost = 1;
unsigned HOST_WIDE_INT masked_const;
- /* The cost will be related to two insns.
- First a load of the constant (MOV or LDR), then a multiply. */
- cost = 2;
- if (! const_ok)
- cost += 1; /* LDR is probably more expensive because
- of longer result latency. */
+ if (i & 0x80000000)
+ i = ~i;
+
+ i &= (unsigned HOST_WIDE_INT) 0xffffffff;
+
masked_const = i & 0xffff8000;
- if (masked_const != 0 && masked_const != 0xffff8000)
+ if (masked_const != 0)
{
+ cost++;
masked_const = i & 0xf8000000;
- if (masked_const == 0 || masked_const == 0xf8000000)
- cost += 1;
- else
- cost += 2;
+ if (masked_const != 0)
+ cost++;
}
- *total = cost;
- return true;
+ *total = COSTS_N_INSNS (cost);
+ return false;
}
- *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
- return true;
+ if (mode == SImode)
+ {
+ *total = COSTS_N_INSNS (3);
+ return false;
+ }
- case COMPARE:
- /* A COMPARE of a MULT is slow on XScale; the muls instruction
- will stall until the multiplication is complete. */
- if (GET_CODE (XEXP (x, 0)) == MULT)
- *total = 4 + rtx_cost (XEXP (x, 0), code, true);
- else
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ /* Requires a lib call */
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
@@ -5610,11 +5948,10 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
/* RTX costs for 9e (and later) cores. */
static bool
-arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
+arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
+ int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
- int nonreg_cost;
- int cost;
if (TARGET_THUMB1)
{
@@ -5640,35 +5977,37 @@ arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
- *total = 3;
- return true;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
- if (GET_MODE_CLASS (mode) == MODE_FLOAT)
- {
- *total = 30;
- return true;
- }
if (mode == DImode)
{
- cost = 7;
- nonreg_cost = 8;
+ *total = COSTS_N_INSNS (5);
+ return false;
}
- else
+
+ if (mode == SImode)
{
- cost = 2;
- nonreg_cost = 4;
+ *total = COSTS_N_INSNS (2);
+ return false;
}
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
+ {
+ *total = COSTS_N_INSNS (1);
+ return false;
+ }
+ }
- *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
- + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
- return true;
+ *total = COSTS_N_INSNS (20);
+ return false;
default:
- *total = arm_rtx_costs_1 (x, code, outer_code);
- return true;
+ return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
/* All address computations that can be done are free, but rtx cost returns