diff options
| -rw-r--r-- | gcc/config/msp430/msp430.c | 502 |
1 files changed, 485 insertions, 17 deletions
diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c index 580e87f..9f76351 100644 --- a/gcc/config/msp430/msp430.c +++ b/gcc/config/msp430/msp430.c @@ -49,6 +49,9 @@ #include "msp430-devices.h" #include "incpath.h" #include "prefix.h" +#include "insn-config.h" +#include "insn-attr.h" +#include "recog.h" /* This file should be included last. */ #include "target-def.h" @@ -56,6 +59,7 @@ static void msp430_compute_frame_info (void); static bool use_32bit_hwmult (void); +static bool use_helper_for_const_shift (machine_mode mode, HOST_WIDE_INT amt); @@ -1118,6 +1122,28 @@ static const struct double_op_cost size_cost_double_op = 2, 2, 3 }; +struct msp430_multlib_costs +{ + const int mulhi; + const int mulsi; + const int muldi; +}; + +/* There is no precise size cost when using libcalls, instead it is disparaged + relative to other instructions. + The cycle costs are from the CALL to the RET, inclusive. + FIXME muldi cost is not accurate. */ +static const struct msp430_multlib_costs cycle_cost_multlib_32bit = +{ + 27, 33, 66 +}; + +/* 32bit multiply takes a few more instructions on 16bit hwmult. */ +static const struct msp430_multlib_costs cycle_cost_multlib_16bit = +{ + 27, 42, 66 +}; + /* TARGET_REGISTER_MOVE_COST There is only one class of general-purpose, non-fixed registers, and the relative cost of moving data between them is always the same. @@ -1160,29 +1186,469 @@ msp430_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, because there are no conditional move insns - when a condition is involved, the only option is to use a cbranch. */ -#undef TARGET_RTX_COSTS -#define TARGET_RTX_COSTS msp430_rtx_costs +/* For X, which must be a MEM RTX, return TRUE if it is an indirect memory + reference, @Rn or @Rn+. */ +static bool +msp430_is_mem_indirect (rtx x) +{ + gcc_assert (GET_CODE (x) == MEM); + rtx op0 = XEXP (x, 0); + return (GET_CODE (op0) == REG || GET_CODE (op0) == POST_INC); +} + +/* Costs of MSP430 instructions are generally based on the addressing mode + combination of the source and destination operands. + Given source operand SRC (which may be NULL to indicate a single-operand + instruction) and destination operand DST return the cost of this + expression. */ +static int +msp430_costs (rtx src, rtx dst, bool speed, rtx outer_rtx) +{ + enum rtx_code src_code = GET_CODE (src); + enum rtx_code dst_code = GET_CODE (dst); + enum rtx_code outer_code = GET_CODE (outer_rtx); + machine_mode outer_mode = GET_MODE (outer_rtx); + const struct double_op_cost *cost_p; + cost_p = (speed ? &cycle_cost_double_op : &size_cost_double_op); + + if (outer_code == TRUNCATE + && (outer_mode == QImode + || outer_mode == HImode + || outer_mode == PSImode)) + /* Truncation to these modes is normally free as a side effect of the + instructions themselves. */ + return 0; + + if (dst_code == SYMBOL_REF + || dst_code == LABEL_REF + || dst_code == CONST_INT) + /* Catch RTX like (minus (const_int 0) (reg)) but don't add any cost. */ + return 0; + + switch (src_code) + { + case REG: + return (dst_code == REG ? cost_p->r2r + : (dst_code == PC ? cost_p->r2pc : cost_p->r2m)); + + case CONST_INT: + case SYMBOL_REF: + case LABEL_REF: + case CONST: + return (dst_code == REG ? cost_p->imm2r + : (dst_code == PC ? cost_p->imm2pc : cost_p->imm2m)); + + + case MEM: + if (msp430_is_mem_indirect (src)) + return (dst_code == REG ? cost_p->ind2r : (dst_code == PC + ? cost_p->ind2pc + : cost_p->ind2m)); + else + return (dst_code == REG ? cost_p->mem2r : (dst_code == PC + ? cost_p->mem2pc + : cost_p->mem2m)); + default: + return cost_p->mem2m; + } +} + +/* Given source operand SRC and destination operand DST from the shift or + rotate RTX OUTER_RTX, return the cost of performing that shift, assuming + optimization for speed when SPEED is true. */ +static int +msp430_shift_costs (rtx src, rtx dst, bool speed, rtx outer_rtx) +{ + int amt; + enum rtx_code src_code = GET_CODE (src); + enum rtx_code dst_code = GET_CODE (dst); + const struct single_op_cost *cost_p; + + cost_p = (speed ? &cycle_cost_single_op : &size_cost_single_op); + + if (src_code != CONST_INT) + /* The size or speed cost when the shift amount is unknown cannot be + accurately calculated, so just disparage it slightly. */ + return 2 * msp430_costs (src, dst, speed, outer_rtx); -static bool msp430_rtx_costs (rtx x ATTRIBUTE_UNUSED, - machine_mode mode, - int outer_code ATTRIBUTE_UNUSED, - int opno ATTRIBUTE_UNUSED, - int * total, - bool speed ATTRIBUTE_UNUSED) + if (use_helper_for_const_shift (GET_MODE (outer_rtx), amt = INTVAL (src))) + { + /* GCC sometimes tries to perform shifts in some very inventive ways, + resulting in much larger code size usage than necessary, if + they are disparaged too much here. So in general, if + use_helper_for_const_shift thinks a helper should be used, obey + that and don't disparage the shift any more than a regular + instruction, even though the shift may actually cost more. + This ensures that the RTL generated at the initial expand pass has the + expected shift instructions, which can be mapped to the helper + functions. */ + return msp430_costs (src, dst, speed, outer_rtx); + } + + if (!msp430x) + { + /* Each shift by one place will be emitted individually. */ + switch (dst_code) + { + case REG: + case CONST_INT: + return amt * cost_p->reg; + case MEM: + if (msp430_is_mem_indirect (dst)) + return amt * cost_p->ind; + else + return amt * cost_p->mem; + default: + return amt * cost_p->mem; + } + } + + /* RRAM, RRCM, RRUM, RLAM are used for shift counts <= 4, otherwise, the 'X' + versions are used. + Instructions which shift a MEM operand will never actually be output. It + will always be copied into a register to allow for efficient shifting. So + the cost just takes into account the cost of an additional copy in that + case. */ + return (amt <= 4 ? (speed ? amt : 1) : (speed ? amt + 1 : 2) + + (dst_code == REG ? 0 + : msp430_costs (dst, gen_rtx_REG (HImode, 10), speed, outer_rtx))); +} + +/* Given source operand SRC and destination operand DST from the MULT/DIV/MOD + RTX OUTER_RTX, return the cost of performing that operation, assuming + optimization for speed when SPEED is true. */ +static int +msp430_muldiv_costs (rtx src, rtx dst, bool speed, rtx outer_rtx, + machine_mode outer_mode) { - int code = GET_CODE (x); + enum rtx_code outer_code = GET_CODE (outer_rtx); + const struct msp430_multlib_costs *cost_p; + bool hwmult_16bit = (msp430_has_hwmult () && !(msp430_use_f5_series_hwmult () + || use_32bit_hwmult ())); + cost_p = (hwmult_16bit + ? &cycle_cost_multlib_32bit + : &cycle_cost_multlib_16bit); + + int factor = 1; + /* Only used in some calculations. */ + int mode_factor = 1; + if (outer_mode == SImode) + mode_factor = 2; + else if (outer_mode == PSImode) + /* PSImode multiplication is performed using SImode operands, so has extra + cost to factor in the conversions necessary before/after the + operation. */ + mode_factor = 3; + else if (outer_mode == DImode) + mode_factor = 4; + + if (!speed) + { + /* The codesize cost of using a helper function to perform the + multiplication or division cannot be accurately calculated, since the + cost depends on how many times the operation is performed in the + entire program. */ + if (outer_code != MULT) + /* Division is always expensive. */ + factor = 7; + else if (((hwmult_16bit && outer_mode != DImode) + || use_32bit_hwmult () || msp430_use_f5_series_hwmult ())) + /* When the hardware multiplier is available, only disparage + slightly. */ + factor = 2; + else + factor = 5; + return factor * mode_factor * msp430_costs (src, dst, speed, outer_rtx); + } + + /* When there is hardware multiply support, there is a relatively low, fixed + cycle cost to performing any multiplication, but when there is no hardware + multiply support it is very costly. That precise cycle cost has not been + calculated here. + Division is extra slow since it always uses a software library. + The 16-bit hardware multiply library cannot be used to produce 64-bit + results. */ + if (outer_code != MULT || !msp430_has_hwmult () + || (outer_mode == DImode && hwmult_16bit)) + { + factor = (outer_code == MULT ? 50 : 70); + return factor * mode_factor * msp430_costs (src, dst, speed, outer_rtx); + } + + switch (outer_mode) + { + case E_QImode: + case E_HImode: + /* Include the cost of copying the operands into and out of the hardware + multiply routine. */ + return cost_p->mulhi + (3 * msp430_costs (src, dst, speed, outer_rtx)); + + case E_PSImode: + /* Extra factor for the conversions necessary to do PSI->SI before the + operation. */ + factor = 2; + /* fallthru. */ + case E_SImode: + return factor * (cost_p->mulsi + + (6 * msp430_costs (src, dst, speed, outer_rtx))); + + case E_DImode: + default: + return cost_p->muldi + (12 * msp430_costs (src, dst, speed, outer_rtx)); + } +} +/* Recurse within X to find the actual destination operand of the expression. + For example: + (plus (ashift (minus (ashift (reg) + (const_int) ...... + should return the reg RTX. */ +static rtx +msp430_get_inner_dest_code (rtx x) +{ + enum rtx_code code = GET_CODE (x); + rtx op0 = XEXP (x, 0); switch (code) { - case SIGN_EXTEND: - if (mode == SImode && outer_code == SET) + case REG: + case SYMBOL_REF: + case CONST_INT: + case CONST: + case LABEL_REF: + return x; + + case MEM: + /* Return the MEM expr not the inner REG for these cases. */ + switch (GET_CODE (op0)) { - *total = COSTS_N_INSNS (4); - return true; + case REG: + case SYMBOL_REF: + case LABEL_REF: + case CONST: + case POST_INC: + return x; + + case PLUS: + /* return MEM (PLUS (REG) (CONST)) */ + if (GET_CODE (XEXP (op0, 0)) == REG) + { + if (GET_CODE (XEXP (op0, 1)) == CONST_INT + || GET_CODE (XEXP (op0, 1)) == CONST + || GET_CODE (XEXP (op0, 1)) == LABEL_REF + || GET_CODE (XEXP (op0, 1)) == SYMBOL_REF) + return x; + else + return msp430_get_inner_dest_code (op0); + } + return msp430_get_inner_dest_code (op0); + + default: + if (GET_RTX_FORMAT (code)[0] != 'e') + return x; + return msp430_get_inner_dest_code (op0); } break; + + default: + if (op0 == NULL_RTX) + gcc_unreachable (); + else + { + if (GET_RTX_FORMAT (code)[0] != 'e' + && code != ENTRY_VALUE) + return x; + return msp430_get_inner_dest_code (op0); + } + } +} + +/* Calculate the cost of an MSP430 single-operand instruction, for operand DST + within the RTX OUTER_RTX, optimizing for speed if SPEED is true. */ +static int +msp430_single_op_cost (rtx dst, bool speed, rtx outer_rtx) +{ + enum rtx_code dst_code = GET_CODE (dst); + const struct single_op_cost *cost_p; + const struct double_op_cost *double_op_cost_p; + + cost_p = (speed ? &cycle_cost_single_op : &size_cost_single_op); + double_op_cost_p = (speed ? &cycle_cost_double_op : &size_cost_double_op); + + switch (dst_code) + { + case REG: + return cost_p->reg; + case MEM: + if (msp430_is_mem_indirect (dst)) + return cost_p->ind; + else + return cost_p->mem; + + case CONST_INT: + case CONST_FIXED: + case CONST_DOUBLE: + case SYMBOL_REF: + case CONST: + /* A constant value would need to be copied into a register first. */ + return double_op_cost_p->imm2r + cost_p->reg; + + default: + return cost_p->mem; + } +} + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS msp430_rtx_costs + +/* This target hook describes the relative costs of RTL expressions. + The function recurses to just before the lowest level of the expression, + when both of the operands of the expression can be examined at the same time. + This is because the cost of the expression depends on the specific + addressing mode combination of the operands. + The hook returns true when all subexpressions of X have been processed, and + false when rtx_cost should recurse. */ +static bool +msp430_rtx_costs (rtx x, + machine_mode mode, + int outer_code ATTRIBUTE_UNUSED, + int opno ATTRIBUTE_UNUSED, + int * total, + bool speed) +{ + enum rtx_code code = GET_CODE (x); + rtx dst, src; + rtx dst_inner, src_inner; + + *total = 0; + dst = XEXP (x, 0); + if (GET_RTX_LENGTH (code) == 1) + /* Some RTX that are single-op in GCC are double-op when translated to + MSP430 instructions e.g NOT, NEG, ZERO_EXTEND. */ + src = dst; + else + src = XEXP (x, 1); + + + switch (code) + { + case SET: + /* Ignoring SET improves codesize. */ + if (!speed) + return true; + /* fallthru. */ + case PLUS: + if (outer_code == MEM) + /* Do not add any cost for the plus itself, but recurse in case there + are more complicated RTX inside. */ + return false; + /* fallthru. */ + case MINUS: + case AND: + case IOR: + case XOR: + case NOT: + case ZERO_EXTEND: + case TRUNCATE: + case NEG: + case ZERO_EXTRACT: + case SIGN_EXTRACT: + case IF_THEN_ELSE: + dst_inner = msp430_get_inner_dest_code (dst); + src_inner = msp430_get_inner_dest_code (src); + *total = COSTS_N_INSNS (msp430_costs (src_inner, dst_inner, speed, x)); + if (mode == SImode) + *total *= 2; + if (mode == DImode) + *total *= 4; + return false; + + case ROTATE: + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + dst_inner = msp430_get_inner_dest_code (dst); + src_inner = msp430_get_inner_dest_code (src); + *total = COSTS_N_INSNS (msp430_shift_costs (src_inner, dst_inner, + speed, x)); + if (mode == SImode) + *total *= 2; + if (mode == DImode) + *total *= 4; + return false; + + case MULT: + case DIV: + case MOD: + case UDIV: + case UMOD: + dst_inner = msp430_get_inner_dest_code (dst); + src_inner = msp430_get_inner_dest_code (src); + *total = COSTS_N_INSNS (msp430_muldiv_costs (src_inner, dst_inner, speed, + x, mode)); + return false; + + case CALL: + case SIGN_EXTEND: + dst_inner = msp430_get_inner_dest_code (dst); + *total = COSTS_N_INSNS (msp430_single_op_cost (dst_inner, speed, x)); + if (mode == SImode) + *total *= 2; + if (mode == DImode) + *total *= 4; + return false; + + case CONST_INT: + case CONST_FIXED: + case CONST_DOUBLE: + case SYMBOL_REF: + case CONST: + case LABEL_REF: + case REG: + case PC: + case POST_INC: + if (mode == SImode) + *total = COSTS_N_INSNS (2); + else if (mode == DImode) + *total = COSTS_N_INSNS (4); + return true; + + case MEM: + /* PSImode operands are expensive when in memory. */ + if (mode == PSImode) + *total = COSTS_N_INSNS (1); + else if (mode == SImode) + *total = COSTS_N_INSNS (2); + else if (mode == DImode) + *total = COSTS_N_INSNS (4); + /* Recurse into the MEM. */ + return false; + + case EQ: + case NE: + case GT: + case GTU: + case GE: + case GEU: + case LT: + case LTU: + case LE: + case LEU: + /* Conditions are mostly equivalent, changing their relative + costs has no effect. */ + return false; + + case ASM_OPERANDS: + case ASM_INPUT: + case CLOBBER: + case COMPARE: + case CONCAT: + case ENTRY_VALUE: + /* Other unhandled expressions. */ + return false; + + default: + return false; } - return false; } /* Function Entry and Exit */ @@ -2947,8 +3413,7 @@ msp430_expand_helper (rtx *operands, const char *helper_name, /* Return TRUE if the helper function should be used and FALSE if the shifts insns should be emitted inline. */ static bool -use_helper_for_const_shift (enum rtx_code code, machine_mode mode, - HOST_WIDE_INT amt) +use_helper_for_const_shift (machine_mode mode, HOST_WIDE_INT amt) { const int default_inline_shift = 4; /* We initialize the option to 65 so we know if the user set it or not. */ @@ -2959,6 +3424,9 @@ use_helper_for_const_shift (enum rtx_code code, machine_mode mode, the heuristic accordingly. */ int max_inline_32 = max_inline / 2; + if (mode == E_DImode) + return true; + /* Don't use helpers for these modes on 430X, when optimizing for speed, or when emitting a small number of insns. */ if ((mode == E_QImode || mode == E_HImode || mode == E_PSImode) @@ -2996,7 +3464,7 @@ msp430_expand_shift (enum rtx_code code, machine_mode mode, rtx *operands) constant. */ if (!CONST_INT_P (operands[2]) || mode == E_DImode - || use_helper_for_const_shift (code, mode, INTVAL (operands[2]))) + || use_helper_for_const_shift (mode, INTVAL (operands[2]))) { const char *helper_name = NULL; /* The const variants of mspabi shifts have significantly larger code |
