diff options
-rw-r--r-- | gcc/config/avr/avr-passes-fuse-move.h | 1 | ||||
-rw-r--r-- | gcc/config/avr/avr-passes.cc | 49 | ||||
-rw-r--r-- | gcc/config/avr/avr.md | 9 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 9 |
4 files changed, 63 insertions, 5 deletions
diff --git a/gcc/config/avr/avr-passes-fuse-move.h b/gcc/config/avr/avr-passes-fuse-move.h index dbed1a6..432f9ca 100644 --- a/gcc/config/avr/avr-passes-fuse-move.h +++ b/gcc/config/avr/avr-passes-fuse-move.h @@ -1172,6 +1172,7 @@ struct bbinfo_t static find_plies_data_t *fpd; static bool try_fuse_p; + static bool try_mem0_p; static bool try_bin_arg1_p; static bool try_simplify_p; static bool try_split_ldi_p; diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index de8de1c..fad64b1 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -434,6 +434,11 @@ static machine_mode size_to_mode (int size) Split all insns where the operation can be performed on individual bytes, like andsi3. In example (4) the andhi3 can be optimized to an andqi3. + + bbinfo_t::try_mem0_p + Try to fuse a mem = reg insn to mem = __zero_reg__. + This should only occur when -msplit-ldst is on, but may + also occur with pushes since push<mode>1 splits them. */ @@ -514,6 +519,7 @@ bool bbinfo_t::try_split_any_p; bool bbinfo_t::try_simplify_p; bool bbinfo_t::use_arith_p; bool bbinfo_t::use_set_some_p; +bool bbinfo_t::try_mem0_p; // Abstract Interpretation of expressions. @@ -1087,6 +1093,7 @@ struct optimize_data_t {} bool try_fuse (bbinfo_t *); + bool try_mem0 (bbinfo_t *); bool try_bin_arg1 (bbinfo_t *); bool try_simplify (bbinfo_t *); bool try_split_ldi (bbinfo_t *); @@ -2509,6 +2516,44 @@ bbinfo_t::run_find_plies (const insninfo_t &ii, const memento_t &memo) const } +// Try to propagate __zero_reg__ to a mem = reg insn's source. +// Returns true on success and sets .n_new_insns. +bool +optimize_data_t::try_mem0 (bbinfo_t *) +{ + rtx_insn *insn = curr.ii.m_insn; + rtx set, mem, reg; + machine_mode mode; + + if (insn + && (set = single_set (insn)) + && MEM_P (mem = SET_DEST (set)) + && REG_P (reg = SET_SRC (set)) + && GET_MODE_SIZE (mode = GET_MODE (mem)) <= 4 + && END_REGNO (reg) <= REG_32 + && ! (regmask (reg) & memento_t::fixed_regs_mask) + && curr.regs.have_value (REGNO (reg), GET_MODE_SIZE (mode), 0x0)) + { + avr_dump (";; Found insn %d: mem:%m = 0 = r%d\n", INSN_UID (insn), + mode, REGNO (reg)); + + // Some insns like PUSHes don't clobber REG_CC. + bool clobbers_cc = GET_CODE (PATTERN (insn)) == PARALLEL; + + if (clobbers_cc) + emit_valid_move_clobbercc (mem, CONST0_RTX (mode)); + else + emit_valid_insn (gen_rtx_SET (mem, CONST0_RTX (mode))); + + n_new_insns = 1; + + return true; + } + + return false; +} + + // Try to fuse two 1-byte insns .prev and .curr to one 2-byte insn (MOVW). // Returns true on success, and sets .n_new_insns, .ignore_mask etc. bool @@ -3108,7 +3153,8 @@ bbinfo_t::optimize_one_block (bool &changed) || (bbinfo_t::try_bin_arg1_p && od.try_bin_arg1 (this)) || (bbinfo_t::try_simplify_p && od.try_simplify (this)) || (bbinfo_t::try_split_ldi_p && od.try_split_ldi (this)) - || (bbinfo_t::try_split_any_p && od.try_split_any (this))); + || (bbinfo_t::try_split_any_p && od.try_split_any (this)) + || (bbinfo_t::try_mem0_p && od.try_mem0 (this))); rtx_insn *new_insns = get_insns (); end_sequence (); @@ -3193,6 +3239,7 @@ bbinfo_t::optimize_one_function (function *func) // Which optimization(s) to perform. bbinfo_t::try_fuse_p = avropt_fuse_move & 0x1; // Digit 0 in [0, 1]. + bbinfo_t::try_mem0_p = avropt_fuse_move & 0x1; // Digit 0 in [0, 1]. bbinfo_t::try_bin_arg1_p = avropt_fuse_move & 0x2; // Digit 1 in [0, 1]. bbinfo_t::try_split_any_p = avropt_fuse_move & 0x4; // Digit 2 in [0, 1]. bbinfo_t::try_split_ldi_p = avropt_fuse_move >> 3; // Digit 3 in [0, 2]. diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 36830012..9c348be 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -450,9 +450,11 @@ (define_insn "pushhi1_insn" [(set (mem:HI (post_dec:HI (reg:HI REG_SP))) - (match_operand:HI 0 "register_operand" "r"))] + (match_operand:HI 0 "reg_or_0_operand" "r,Y00"))] "" - "push %B0\;push %A0" + "@ + push %B0\;push %A0 + push __zero_reg__\;push __zero_reg__" [(set_attr "length" "2")]) ;; All modes for a multi-byte push. We must include complex modes here too, @@ -1029,6 +1031,9 @@ // provided non-volatile, addr-space = generic, no reg-overlap // and the resulting addressings are natively supported. if (avropt_split_ldst + // Splitting too early may obfuscate some PRE_DEC / POST_INC + // opportunities, thus only split after avr-fuse-add. + && n_avr_fuse_add_executed > 0 && GET_MODE_SIZE (<MODE>mode) > 1 && avr_split_ldst (operands)) DONE; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4b1acf9..78ead0e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -904,9 +904,9 @@ Objective-C and Objective-C++ Dialects}. -mbranch-cost=@var{cost} -mfuse-add=@var{level} -mfuse-move=@var{level} -mcall-prologues -mgas-isr-prologues -mint8 -mflmap -mdouble=@var{bits} -mlong-double=@var{bits} --mn_flash=@var{size} -mno-interrupts +-mn_flash=@var{size} -mfract-convert-truncate -mno-interrupts -mmain-is-OS_task -mrelax -mrmw -mstrict-X -mtiny-stack --mrodata-in-ram -mfract-convert-truncate -msplit-bit-shift +-mrodata-in-ram -msplit-bit-shift -msplit-ldst -mshort-calls -mskip-bug -nodevicelib -nodevicespecs -Waddr-space-convert -Wmisspelled-isr} @@ -24374,6 +24374,11 @@ This optimization is turned on per default for @option{-O2} and higher, including @option{-Os} but excluding @option{-Oz}. Splitting of shifts with a constant offset that is a multiple of 8 is controlled by @option{-mfuse-move}. +@opindex msplit-ldst + +@item -msplit-ldst +Split multi-byte loads and stores into several byte loads and stores. +This optimization is turned on per default for @option{-O2} and higher. @opindex mtiny-stack @item -mtiny-stack |