diff options
author | Georg-Johann Lay <avr@gjlay.de> | 2012-09-28 08:21:06 +0000 |
---|---|---|
committer | Georg-Johann Lay <gjl@gcc.gnu.org> | 2012-09-28 08:21:06 +0000 |
commit | 3fd1e31dc5e6b1017a0a49c1b6d17c1e1e268b27 (patch) | |
tree | cb5b381dedfa92278ef86432edd69d0a6e334ac4 | |
parent | af710874e9402567635008a88279e9ab02227b2d (diff) | |
download | gcc-3fd1e31dc5e6b1017a0a49c1b6d17c1e1e268b27.zip gcc-3fd1e31dc5e6b1017a0a49c1b6d17c1e1e268b27.tar.gz gcc-3fd1e31dc5e6b1017a0a49c1b6d17c1e1e268b27.tar.bz2 |
re PR rtl-optimization/52543 (lower-subreg.c: code bloat of 300%-400% for multi-word memory splits)
PR rtl-optimization/52543
* config/avr/avr.c (avr_mode_dependent_address_p): Return true for
all non-generic address spaces.
(TARGET_SECONDARY_RELOAD): New hook define to...
(avr_secondary_reload): ...this new static function.
* config/avr/avr.md (reload_in<mode>): New insns.
Undo r185605 (mostly):
* config/avr/avr-protos.h (avr_load_lpm): Remove.
* config/avr/avr.c (avr_load_libgcc_p): Don't restrict to __flash loads.
(avr_out_lpm): Also handle loads > 1 byte.
(avr_load_lpm): Remove.
(avr_find_unused_d_reg): New static function.
(avr_out_lpm_no_lpmx): New static function.
(adjust_insn_length): Remove ADJUST_LEN_LOAD_LPM.
* config/avr/avr.md (unspec): Remove UNSPEC_LPM.
(load_<mode>_libgcc): Use MEM instead of UNSPEC_LPM.
(load_<mode>, load_<mode>_clobber): Remove.
(mov<mode>): For multi-byte move from non-generic
16-bit address spaces: Expand to *mov<mode> again.
(load<mode>_libgcc): New expander.
(split-lpmx): Remove split.
From-SVN: r191820
-rw-r--r-- | gcc/ChangeLog | 25 | ||||
-rw-r--r-- | gcc/config/avr/avr-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/avr/avr.c | 471 | ||||
-rw-r--r-- | gcc/config/avr/avr.md | 201 |
4 files changed, 463 insertions, 236 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 42deeba..fdabc44 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2012-09-28 Georg-Johann Lay <avr@gjlay.de> + + PR rtl-optimization/52543 + * config/avr/avr.c (avr_mode_dependent_address_p): Return true for + all non-generic address spaces. + (TARGET_SECONDARY_RELOAD): New hook define to... + (avr_secondary_reload): ...this new static function. + * config/avr/avr.md (reload_in<mode>): New insns. + + Undo r185605 (mostly): + * config/avr/avr-protos.h (avr_load_lpm): Remove. + * config/avr/avr.c (avr_load_libgcc_p): Don't restrict to __flash loads. + (avr_out_lpm): Also handle loads > 1 byte. + (avr_load_lpm): Remove. + (avr_find_unused_d_reg): New static function. + (avr_out_lpm_no_lpmx): New static function. + (adjust_insn_length): Remove ADJUST_LEN_LOAD_LPM. + * config/avr/avr.md (unspec): Remove UNSPEC_LPM. + (load_<mode>_libgcc): Use MEM instead of UNSPEC_LPM. + (load_<mode>, load_<mode>_clobber): Remove. + (mov<mode>): For multi-byte move from non-generic + 16-bit address spaces: Expand to *mov<mode> again. + (load<mode>_libgcc): New expander. + (split-lpmx): Remove split. + 2012-09-27 Dehao Chen <dehao@google.com> * tree.h (tree_constructor): Remove IS_UNKNOWN_LOCATION. diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 690c89b..91cac4c 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -75,8 +75,6 @@ extern const char *avr_out_ashlpsi3 (rtx, rtx*, int*); extern const char *avr_out_ashrpsi3 (rtx, rtx*, int*); extern const char *avr_out_lshrpsi3 (rtx, rtx*, int*); -extern const char* avr_load_lpm (rtx, rtx*, int*); - extern bool avr_rotate_bytes (rtx operands[]); extern const char* avr_out_fract (rtx, rtx[], bool, int*); diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index 87ff531..c9740ba 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -1625,17 +1625,17 @@ avr_cannot_modify_jumps_p (void) /* Implement `TARGET_MODE_DEPENDENT_ADDRESS_P'. */ -/* FIXME: PSImode addresses are not mode-dependent in themselves. - This hook just serves to hack around PR rtl-optimization/52543 by - claiming that PSImode addresses (which are used for the 24-bit - address space __memx) were mode-dependent so that lower-subreg.s - will skip these addresses. See also the similar FIXME comment along - with mov<mode> expanders in avr.md. */ - static bool -avr_mode_dependent_address_p (const_rtx addr, addr_space_t as ATTRIBUTE_UNUSED) +avr_mode_dependent_address_p (const_rtx addr ATTRIBUTE_UNUSED, addr_space_t as) { - return GET_MODE (addr) != Pmode; + /* FIXME: Non-generic addresses are not mode-dependent in themselves. + This hook just serves to hack around PR rtl-optimization/52543 by + claiming that non-generic addresses were mode-dependent so that + lower-subreg.c will skip these addresses. lower-subreg.c sets up fake + RTXes to probe SET and MEM costs and assumes that MEM is always in the + generic address space which is not true. */ + + return !ADDR_SPACE_GENERIC_P (as); } @@ -1865,6 +1865,50 @@ avr_legitimize_reload_address (rtx *px, enum machine_mode mode, } +/* Implement `TARGET_SECONDARY_RELOAD' */ + +static reg_class_t +avr_secondary_reload (bool in_p, rtx x, + reg_class_t reload_class ATTRIBUTE_UNUSED, + enum machine_mode mode, secondary_reload_info *sri) +{ + if (in_p + && MEM_P (x) + && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)) + && ADDR_SPACE_MEMX != MEM_ADDR_SPACE (x)) + { + /* For the non-generic 16-bit spaces we need a d-class scratch. */ + + switch (mode) + { + default: + gcc_unreachable(); + + case QImode: sri->icode = CODE_FOR_reload_inqi; break; + case QQmode: sri->icode = CODE_FOR_reload_inqq; break; + case UQQmode: sri->icode = CODE_FOR_reload_inuqq; break; + + case HImode: sri->icode = CODE_FOR_reload_inhi; break; + case HQmode: sri->icode = CODE_FOR_reload_inhq; break; + case HAmode: sri->icode = CODE_FOR_reload_inha; break; + case UHQmode: sri->icode = CODE_FOR_reload_inuhq; break; + case UHAmode: sri->icode = CODE_FOR_reload_inuha; break; + + case PSImode: sri->icode = CODE_FOR_reload_inpsi; break; + + case SImode: sri->icode = CODE_FOR_reload_insi; break; + case SFmode: sri->icode = CODE_FOR_reload_insf; break; + case SQmode: sri->icode = CODE_FOR_reload_insq; break; + case SAmode: sri->icode = CODE_FOR_reload_insa; break; + case USQmode: sri->icode = CODE_FOR_reload_inusq; break; + case USAmode: sri->icode = CODE_FOR_reload_inusa; break; + } + } + + return NO_REGS; +} + + /* Helper function to print assembler resp. track instruction sequence lengths. Always return "". @@ -2655,8 +2699,7 @@ avr_load_libgcc_p (rtx op) return (n_bytes > 2 && !AVR_HAVE_LPMX - && MEM_P (op) - && MEM_ADDR_SPACE (op) == ADDR_SPACE_FLASH); + && avr_mem_flash_p (op)); } /* Return true if a value of mode MODE is read by __xload_* function. */ @@ -2671,6 +2714,157 @@ avr_xload_libgcc_p (enum machine_mode mode) } +/* Fixme: This is a hack because secondary reloads don't works as expected. + + Find an unused d-register to be used as scratch in INSN. + EXCLUDE is either NULL_RTX or some register. In the case where EXCLUDE + is a register, skip all possible return values that overlap EXCLUDE. + The policy for the returned register is similar to that of + `reg_unused_after', i.e. the returned register may overlap the SET_DEST + of INSN. + + Return a QImode d-register or NULL_RTX if nothing found. */ + +static rtx +avr_find_unused_d_reg (rtx insn, rtx exclude) +{ + int regno; + bool isr_p = (avr_interrupt_function_p (current_function_decl) + || avr_signal_function_p (current_function_decl)); + + for (regno = 16; regno < 32; regno++) + { + rtx reg = all_regs_rtx[regno]; + + if ((exclude + && reg_overlap_mentioned_p (exclude, reg)) + || fixed_regs[regno]) + { + continue; + } + + /* Try non-live register */ + + if (!df_regs_ever_live_p (regno) + && (TREE_THIS_VOLATILE (current_function_decl) + || cfun->machine->is_OS_task + || cfun->machine->is_OS_main + || (!isr_p && call_used_regs[regno]))) + { + return reg; + } + + /* Any live register can be used if it is unused after. + Prologue/epilogue will care for it as needed. */ + + if (df_regs_ever_live_p (regno) + && reg_unused_after (insn, reg)) + { + return reg; + } + } + + return NULL_RTX; +} + + +/* Helper function for the next function in the case where only restricted + version of LPM instruction is available. */ + +static const char* +avr_out_lpm_no_lpmx (rtx insn, rtx *xop, int *plen) +{ + rtx dest = xop[0]; + rtx addr = xop[1]; + int n_bytes = GET_MODE_SIZE (GET_MODE (dest)); + int regno_dest; + + regno_dest = REGNO (dest); + + /* The implicit target register of LPM. */ + xop[3] = lpm_reg_rtx; + + switch (GET_CODE (addr)) + { + default: + gcc_unreachable(); + + case REG: + + gcc_assert (REG_Z == REGNO (addr)); + + switch (n_bytes) + { + default: + gcc_unreachable(); + + case 1: + avr_asm_len ("%4lpm", xop, plen, 1); + + if (regno_dest != LPM_REGNO) + avr_asm_len ("mov %0,%3", xop, plen, 1); + + return ""; + + case 2: + if (REGNO (dest) == REG_Z) + return avr_asm_len ("%4lpm" CR_TAB + "push %3" CR_TAB + "adiw %2,1" CR_TAB + "%4lpm" CR_TAB + "mov %B0,%3" CR_TAB + "pop %A0", xop, plen, 6); + + avr_asm_len ("%4lpm" CR_TAB + "mov %A0,%3" CR_TAB + "adiw %2,1" CR_TAB + "%4lpm" CR_TAB + "mov %B0,%3", xop, plen, 5); + + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,1", xop, plen, 1); + + break; /* 2 */ + } + + break; /* REG */ + + case POST_INC: + + gcc_assert (REG_Z == REGNO (XEXP (addr, 0)) + && n_bytes <= 4); + + if (regno_dest == LPM_REGNO) + avr_asm_len ("%4lpm" CR_TAB + "adiw %2,1", xop, plen, 2); + else + avr_asm_len ("%4lpm" CR_TAB + "mov %A0,%3" CR_TAB + "adiw %2,1", xop, plen, 3); + + if (n_bytes >= 2) + avr_asm_len ("%4lpm" CR_TAB + "mov %B0,%3" CR_TAB + "adiw %2,1", xop, plen, 3); + + if (n_bytes >= 3) + avr_asm_len ("%4lpm" CR_TAB + "mov %C0,%3" CR_TAB + "adiw %2,1", xop, plen, 3); + + if (n_bytes >= 4) + avr_asm_len ("%4lpm" CR_TAB + "mov %D0,%3" CR_TAB + "adiw %2,1", xop, plen, 3); + + break; /* POST_INC */ + + } /* switch CODE (addr) */ + + return ""; +} + + /* If PLEN == NULL: Ouput instructions to load a value from a memory location OP[1] in AS1 to register OP[0]. If PLEN != 0 set *PLEN to the length in words of the instruction sequence. @@ -2679,11 +2873,13 @@ avr_xload_libgcc_p (enum machine_mode mode) static const char* avr_out_lpm (rtx insn, rtx *op, int *plen) { - rtx xop[3]; + rtx xop[7]; rtx dest = op[0]; rtx src = SET_SRC (single_set (insn)); rtx addr; int n_bytes = GET_MODE_SIZE (GET_MODE (dest)); + int regno_dest; + int segment; RTX_CODE code; addr_space_t as = MEM_ADDR_SPACE (src); @@ -2704,18 +2900,56 @@ avr_out_lpm (rtx insn, rtx *op, int *plen) gcc_assert (REG_P (dest)); gcc_assert (REG == code || POST_INC == code); - /* Only 1-byte moves from __flash are representes as open coded - mov insns. All other loads from flash are not handled here but - by some UNSPEC instead, see respective FIXME in machine description. */ - - gcc_assert (as == ADDR_SPACE_FLASH); - gcc_assert (n_bytes == 1); - xop[0] = dest; - xop[1] = lpm_addr_reg_rtx; - xop[2] = lpm_reg_rtx; + xop[1] = addr; + xop[2] = lpm_addr_reg_rtx; + xop[4] = xstring_empty; + xop[5] = tmp_reg_rtx; + xop[6] = XEXP (rampz_rtx, 0); - switch (code) + regno_dest = REGNO (dest); + + segment = avr_addrspace[as].segment; + + /* Set RAMPZ as needed. */ + + if (segment) + { + xop[4] = GEN_INT (segment); + xop[3] = avr_find_unused_d_reg (insn, lpm_addr_reg_rtx); + + if (xop[3] != NULL_RTX) + { + avr_asm_len ("ldi %3,%4" CR_TAB + "out %i6,%3", xop, plen, 2); + } + else if (segment == 1) + { + avr_asm_len ("clr %5" CR_TAB + "inc %5" CR_TAB + "out %i6,%5", xop, plen, 3); + } + else + { + avr_asm_len ("mov %5,%2" CR_TAB + "ldi %2,%4" CR_TAB + "out %i6,%2" CR_TAB + "mov %2,%5", xop, plen, 4); + } + + xop[4] = xstring_e; + + if (!AVR_HAVE_ELPMX) + return avr_out_lpm_no_lpmx (insn, xop, plen); + } + else if (!AVR_HAVE_LPMX) + { + return avr_out_lpm_no_lpmx (insn, xop, plen); + } + + /* We have [E]LPMX: Output reading from Flash the comfortable way. */ + + switch (GET_CODE (addr)) { default: gcc_unreachable(); @@ -2723,105 +2957,85 @@ avr_out_lpm (rtx insn, rtx *op, int *plen) case REG: gcc_assert (REG_Z == REGNO (addr)); - - return AVR_HAVE_LPMX - ? avr_asm_len ("lpm %0,%a1", xop, plen, 1) - : avr_asm_len ("lpm" CR_TAB - "mov %0,%2", xop, plen, 2); - - case POST_INC: - - gcc_assert (REG_Z == REGNO (XEXP (addr, 0))); - return AVR_HAVE_LPMX - ? avr_asm_len ("lpm %0,%a1+", xop, plen, 1) - : avr_asm_len ("lpm" CR_TAB - "adiw %1, 1" CR_TAB - "mov %0,%2", xop, plen, 3); - } + switch (n_bytes) + { + default: + gcc_unreachable(); - return ""; -} + case 1: + return avr_asm_len ("%4lpm %0,%a2", xop, plen, 1); + case 2: + if (REGNO (dest) == REG_Z) + return avr_asm_len ("%4lpm %5,%a2+" CR_TAB + "%4lpm %B0,%a2" CR_TAB + "mov %A0,%5", xop, plen, 3); + else + { + avr_asm_len ("%4lpm %A0,%a2+" CR_TAB + "%4lpm %B0,%a2", xop, plen, 2); + + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,1", xop, plen, 1); + } + + break; /* 2 */ -/* If PLEN == NULL: Ouput instructions to load $0 with a value from - flash address $1:Z. If $1 = 0 we can use LPM to read, otherwise - use ELPM. - If PLEN != 0 set *PLEN to the length in words of the instruction sequence. - Return "". */ + case 3: -const char* -avr_load_lpm (rtx insn, rtx *op, int *plen) -{ - rtx xop[4]; - int n, n_bytes = GET_MODE_SIZE (GET_MODE (op[0])); - rtx xsegment = op[1]; - bool clobber_z = PARALLEL == GET_CODE (PATTERN (insn)); - bool r30_in_tmp = false; - - if (plen) - *plen = 0; - - xop[1] = lpm_addr_reg_rtx; - xop[2] = lpm_reg_rtx; - xop[3] = xstring_empty; - - /* Set RAMPZ as needed. */ - - if (REG_P (xsegment)) - { - avr_asm_len ("out __RAMPZ__,%0", &xsegment, plen, 1); - xop[3] = xstring_e; - } - - /* Load the individual bytes from LSB to MSB. */ - - for (n = 0; n < n_bytes; n++) - { - xop[0] = all_regs_rtx[REGNO (op[0]) + n]; + avr_asm_len ("%4lpm %A0,%a2+" CR_TAB + "%4lpm %B0,%a2+" CR_TAB + "%4lpm %C0,%a2", xop, plen, 3); + + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,2", xop, plen, 1); + + break; /* 3 */ - if ((CONST_INT_P (xsegment) && AVR_HAVE_LPMX) - || (REG_P (xsegment) && AVR_HAVE_ELPMX)) - { - if (n == n_bytes-1) - avr_asm_len ("%3lpm %0,%a1", xop, plen, 1); - else if (REGNO (xop[0]) == REG_Z) + case 4: + + avr_asm_len ("%4lpm %A0,%a2+" CR_TAB + "%4lpm %B0,%a2+", xop, plen, 2); + + if (REGNO (dest) == REG_Z - 2) + return avr_asm_len ("%4lpm %5,%a2+" CR_TAB + "%4lpm %C0,%a2" CR_TAB + "mov %D0,%5", xop, plen, 3); + else { - avr_asm_len ("%3lpm %2,%a1+", xop, plen, 1); - r30_in_tmp = true; + avr_asm_len ("%4lpm %C0,%a2+" CR_TAB + "%4lpm %D0,%a2", xop, plen, 2); + + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,3", xop, plen, 1); } - else - avr_asm_len ("%3lpm %0,%a1+", xop, plen, 1); - } - else - { - gcc_assert (clobber_z); - - avr_asm_len ("%3lpm" CR_TAB - "mov %0,%2", xop, plen, 2); - if (n != n_bytes-1) - avr_asm_len ("adiw %1,1", xop, plen, 1); - } - } - - if (r30_in_tmp) - avr_asm_len ("mov %1,%2", xop, plen, 1); - - if (!clobber_z - && n_bytes > 1 - && !reg_unused_after (insn, lpm_addr_reg_rtx) - && !reg_overlap_mentioned_p (op[0], lpm_addr_reg_rtx)) - { - xop[2] = GEN_INT (n_bytes-1); - avr_asm_len ("sbiw %1,%2", xop, plen, 1); - } - - if (REG_P (xsegment) && AVR_HAVE_RAMPD) + break; /* 4 */ + } /* n_bytes */ + + break; /* REG */ + + case POST_INC: + + gcc_assert (REG_Z == REGNO (XEXP (addr, 0)) + && n_bytes <= 4); + + avr_asm_len ("%4lpm %A0,%a2+", xop, plen, 1); + if (n_bytes >= 2) avr_asm_len ("%4lpm %B0,%a2+", xop, plen, 1); + if (n_bytes >= 3) avr_asm_len ("%4lpm %C0,%a2+", xop, plen, 1); + if (n_bytes >= 4) avr_asm_len ("%4lpm %D0,%a2+", xop, plen, 1); + + break; /* POST_INC */ + + } /* switch CODE (addr) */ + + if (xop[4] == xstring_e && AVR_HAVE_RAMPD) { /* Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM */ - - avr_asm_len ("out __RAMPZ__,__zero_reg__", xop, plen, 1); + + xop[0] = zero_reg_rtx; + avr_asm_len ("out %i6,%0", xop, plen, 1); } return ""; @@ -2857,7 +3071,7 @@ avr_out_xload (rtx insn ATTRIBUTE_UNUSED, rtx *op, int *plen) const char* -output_movqi (rtx insn, rtx operands[], int *real_l) +output_movqi (rtx insn, rtx operands[], int *plen) { rtx dest = operands[0]; rtx src = operands[1]; @@ -2865,32 +3079,29 @@ output_movqi (rtx insn, rtx operands[], int *real_l) if (avr_mem_flash_p (src) || avr_mem_flash_p (dest)) { - return avr_out_lpm (insn, operands, real_l); + return avr_out_lpm (insn, operands, plen); } - if (real_l) - *real_l = 1; - gcc_assert (1 == GET_MODE_SIZE (GET_MODE (dest))); if (REG_P (dest)) { if (REG_P (src)) /* mov r,r */ - { - if (test_hard_reg_class (STACK_REG, dest)) - return "out %0,%1"; - else if (test_hard_reg_class (STACK_REG, src)) - return "in %0,%1"; - - return "mov %0,%1"; - } + { + if (test_hard_reg_class (STACK_REG, dest)) + return avr_asm_len ("out %0,%1", operands, plen, -1); + else if (test_hard_reg_class (STACK_REG, src)) + return avr_asm_len ("in %0,%1", operands, plen, -1); + + return avr_asm_len ("mov %0,%1", operands, plen, -1); + } else if (CONSTANT_P (src)) { - output_reload_in_const (operands, NULL_RTX, real_l, false); + output_reload_in_const (operands, NULL_RTX, plen, false); return ""; } else if (MEM_P (src)) - return out_movqi_r_mr (insn, operands, real_l); /* mov r,m */ + return out_movqi_r_mr (insn, operands, plen); /* mov r,m */ } else if (MEM_P (dest)) { @@ -2899,8 +3110,9 @@ output_movqi (rtx insn, rtx operands[], int *real_l) xop[0] = dest; xop[1] = src == CONST0_RTX (GET_MODE (dest)) ? zero_reg_rtx : src; - return out_movqi_mr_r (insn, xop, real_l); + return out_movqi_mr_r (insn, xop, plen); } + return ""; } @@ -7314,7 +7526,6 @@ adjust_insn_length (rtx insn, int len) case ADJUST_LEN_MOV32: output_movsisf (insn, op, &len); break; case ADJUST_LEN_MOVMEM: avr_out_movmem (insn, op, &len); break; case ADJUST_LEN_XLOAD: avr_out_xload (insn, op, &len); break; - case ADJUST_LEN_LOAD_LPM: avr_load_lpm (insn, op, &len); break; case ADJUST_LEN_SFRACT: avr_out_fract (insn, op, true, &len); break; case ADJUST_LEN_UFRACT: avr_out_fract (insn, op, false, &len); break; @@ -10337,8 +10548,7 @@ avr_addr_space_pointer_mode (addr_space_t as) static bool avr_reg_ok_for_pgm_addr (rtx reg, bool strict) { - if (!REG_P (reg)) - return false; + gcc_assert (REG_P (reg)); if (strict) { @@ -11710,6 +11920,9 @@ avr_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *arg, #undef TARGET_MODE_DEPENDENT_ADDRESS_P #define TARGET_MODE_DEPENDENT_ADDRESS_P avr_mode_dependent_address_p +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD avr_secondary_reload + #undef TARGET_PRINT_OPERAND #define TARGET_PRINT_OPERAND avr_print_operand #undef TARGET_PRINT_OPERAND_ADDRESS diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index b86f9b6..3340643 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -63,7 +63,6 @@ [UNSPEC_STRLEN UNSPEC_MOVMEM UNSPEC_INDEX_JMP - UNSPEC_LPM UNSPEC_FMUL UNSPEC_FMULS UNSPEC_FMULSU @@ -142,7 +141,7 @@ tsthi, tstpsi, tstsi, compare, compare64, call, mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32, ufract, sfract, - xload, movmem, load_lpm, + xload, movmem, ashlqi, ashrqi, lshrqi, ashlhi, ashrhi, lshrhi, ashlsi, ashrsi, lshrsi, @@ -393,60 +392,57 @@ ;;======================================================================== ;; Move stuff around -;; Represent a load from __flash that needs libgcc support as UNSPEC. -;; This is legal because we read from non-changing memory. -;; For rationale see the FIXME below. - -;; "load_psi_libgcc" -;; "load_si_libgcc" -;; "load_sf_libgcc" -(define_insn "load_<mode>_libgcc" - [(set (reg:MOVMODE 22) - (unspec:MOVMODE [(reg:HI REG_Z)] - UNSPEC_LPM))] - "" - { - rtx n_bytes = GEN_INT (GET_MODE_SIZE (<MODE>mode)); - output_asm_insn ("%~call __load_%0", &n_bytes); - return ""; - } - [(set_attr "type" "xcall") - (set_attr "cc" "clobber")]) - - -;; Similar for inline reads from flash. We use UNSPEC instead -;; of MEM for the same reason as above: PR52543. -;; $1 contains the memory segment. - -(define_insn "load_<mode>" - [(set (match_operand:MOVMODE 0 "register_operand" "=r") - (unspec:MOVMODE [(reg:HI REG_Z) - (match_operand:QI 1 "reg_or_0_operand" "rL")] - UNSPEC_LPM))] - "(CONST_INT_P (operands[1]) && AVR_HAVE_LPMX) - || (REG_P (operands[1]) && AVR_HAVE_ELPMX)" +;; Secondary input reload from non-generic 16-bit address spaces +(define_insn "reload_in<mode>" + [(set (match_operand:MOVMODE 0 "register_operand" "=r") + (match_operand:MOVMODE 1 "memory_operand" "m")) + (clobber (match_operand:QI 2 "d_register_operand" "=d"))] + "MEM_P (operands[1]) + && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1]))" { - return avr_load_lpm (insn, operands, NULL); + return output_movqi (insn, operands, NULL); } - [(set_attr "adjust_len" "load_lpm") + [(set_attr "adjust_len" "mov8") (set_attr "cc" "clobber")]) -;; Similar to above for the complementary situation when there is no [E]LPMx. -;; Clobber Z in that case. +;; "loadqi_libgcc" +;; "loadhi_libgcc" +;; "loadpsi_libgcc" +;; "loadsi_libgcc" +;; "loadsf_libgcc" +(define_expand "load<mode>_libgcc" + [(set (match_dup 3) + (match_dup 2)) + (set (reg:MOVMODE 22) + (match_operand:MOVMODE 1 "memory_operand" "")) + (set (match_operand:MOVMODE 0 "register_operand" "") + (reg:MOVMODE 22))] + "avr_load_libgcc_p (operands[1])" + { + operands[3] = gen_rtx_REG (HImode, REG_Z); + operands[2] = force_operand (XEXP (operands[1], 0), NULL_RTX); + operands[1] = replace_equiv_address (operands[1], operands[3]); + set_mem_addr_space (operands[1], ADDR_SPACE_FLASH); + }) -(define_insn "load_<mode>_clobber" - [(set (match_operand:MOVMODE 0 "register_operand" "=r") - (unspec:MOVMODE [(reg:HI REG_Z) - (match_operand:QI 1 "reg_or_0_operand" "rL")] - UNSPEC_LPM)) - (clobber (reg:HI REG_Z))] - "!((CONST_INT_P (operands[1]) && AVR_HAVE_LPMX) - || (REG_P (operands[1]) && AVR_HAVE_ELPMX))" +;; "load_qi_libgcc" +;; "load_hi_libgcc" +;; "load_psi_libgcc" +;; "load_si_libgcc" +;; "load_sf_libgcc" +(define_insn "load_<mode>_libgcc" + [(set (reg:MOVMODE 22) + (match_operand:MOVMODE 0 "memory_operand" "m,m"))] + "avr_load_libgcc_p (operands[0]) + && REG_P (XEXP (operands[0], 0)) + && REG_Z == REGNO (XEXP (operands[0], 0))" { - return avr_load_lpm (insn, operands, NULL); + operands[0] = GEN_INT (GET_MODE_SIZE (<MODE>mode)); + return "%~call __load_%0"; } - [(set_attr "adjust_len" "load_lpm") + [(set_attr "length" "1,2") + (set_attr "isa" "rjmp,jmp") (set_attr "cc" "clobber")]) @@ -464,6 +460,10 @@ "&& 1" [(clobber (const_int 0))] { + /* ; Split away the high part of the address. GCC's register allocator + ; in not able to allocate segment registers and reload the resulting + ; expressions. Notice that no address register can hold a PSImode. */ + rtx insn, addr = XEXP (operands[1], 0); rtx hi8 = gen_reg_rtx (QImode); rtx reg_z = gen_rtx_REG (HImode, REG_Z); @@ -583,72 +583,29 @@ operands[1] = src = copy_to_mode_reg (<MODE>mode, src); } - if (avr_mem_memx_p (src)) - { - rtx addr = XEXP (src, 0); - - if (!REG_P (addr)) - src = replace_equiv_address (src, copy_to_mode_reg (PSImode, addr)); - - if (!avr_xload_libgcc_p (<MODE>mode)) - /* ; No <mode> here because gen_xload8<mode>_A only iterates over ALL1. - ; insn-emit does not depend on the mode, it' all about operands. */ - emit_insn (gen_xload8qi_A (dest, src)); - else - emit_insn (gen_xload<mode>_A (dest, src)); - - DONE; - } + if (avr_mem_memx_p (src)) + { + rtx addr = XEXP (src, 0); - /* For old devices without LPMx, prefer __flash loads per libcall. */ + if (!REG_P (addr)) + src = replace_equiv_address (src, copy_to_mode_reg (PSImode, addr)); - if (avr_load_libgcc_p (src)) - { - emit_move_insn (gen_rtx_REG (Pmode, REG_Z), - force_reg (Pmode, XEXP (src, 0))); + if (!avr_xload_libgcc_p (<MODE>mode)) + /* ; No <mode> here because gen_xload8<mode>_A only iterates over ALL1. + ; insn-emit does not depend on the mode, it's all about operands. */ + emit_insn (gen_xload8qi_A (dest, src)); + else + emit_insn (gen_xload<mode>_A (dest, src)); - emit_insn (gen_load_<mode>_libgcc ()); - emit_move_insn (dest, gen_rtx_REG (<MODE>mode, 22)); DONE; } - /* ; FIXME: Hack around PR rtl-optimization/52543. - ; lower-subreg.c splits loads from the 16-bit address spaces which - ; causes code bloat because each load need his setting of RAMPZ. - ; Moreover, the split will happen in such a way that the loads don't - ; take advantage of POST_INC addressing. Thus, we use UNSPEC to - ; represent these loads instead. Notice that this is legitimate - ; because the memory content does not change: Loads from the same - ; address will yield the same value. - ; POST_INC addressing would make the addresses mode_dependent and could - ; work around that PR, too. However, notice that it is *not* legitimate - ; to expand to POST_INC at expand time: The following passes assert - ; that pre-/post-modify addressing is introduced by .auto_inc_dec and - ; does not exist before that pass. */ - - if (avr_mem_flash_p (src) - && (GET_MODE_SIZE (<MODE>mode) > 1 - || MEM_ADDR_SPACE (src) != ADDR_SPACE_FLASH)) + if (avr_load_libgcc_p (src)) { - rtx xsegment = GEN_INT (avr_addrspace[MEM_ADDR_SPACE (src)].segment); - if (!AVR_HAVE_ELPM) - xsegment = const0_rtx; - if (xsegment != const0_rtx) - xsegment = force_reg (QImode, xsegment); - - emit_move_insn (gen_rtx_REG (Pmode, REG_Z), - force_reg (Pmode, XEXP (src, 0))); - - if ((CONST_INT_P (xsegment) && AVR_HAVE_LPMX) - || (REG_P (xsegment) && AVR_HAVE_ELPMX)) - emit_insn (gen_load_<mode> (dest, xsegment)); - else - emit_insn (gen_load_<mode>_clobber (dest, xsegment)); + /* For the small devices, do loads per libgcc call. */ + emit_insn (gen_load<mode>_libgcc (dest, src)); DONE; } - - /* ; The only address-space for which we use plain MEM and reload - ; machinery are 1-byte loads from __flash. */ }) ;;======================================================================== @@ -798,6 +755,40 @@ operands[5] = gen_rtx_REG (HImode, REGNO (operands[3])); }) +;; For LPM loads from AS1 we split +;; R = *Z +;; to +;; R = *Z++ +;; Z = Z - sizeof (R) +;; +;; so that the second instruction can be optimized out. + +(define_split ; "split-lpmx" + [(set (match_operand:HISI 0 "register_operand" "") + (match_operand:HISI 1 "memory_operand" ""))] + "reload_completed + && AVR_HAVE_LPMX" + [(set (match_dup 0) + (match_dup 2)) + (set (match_dup 3) + (plus:HI (match_dup 3) + (match_dup 4)))] + { + rtx addr = XEXP (operands[1], 0); + + if (!avr_mem_flash_p (operands[1]) + || !REG_P (addr) + || reg_overlap_mentioned_p (addr, operands[0])) + { + FAIL; + } + + operands[2] = replace_equiv_address (operands[1], + gen_rtx_POST_INC (Pmode, addr)); + operands[3] = addr; + operands[4] = gen_int_mode (-GET_MODE_SIZE (<MODE>mode), HImode); + }) + ;;========================================================================== ;; xpointer move (24 bit) |