aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorGeorg-Johann Lay <avr@gjlay.de>2025-07-30 12:34:56 +0200
committerGeorg-Johann Lay <avr@gjlay.de>2025-07-31 20:31:30 +0200
commit7d75e87f3051fa29d48e923016f04a8042a8ec53 (patch)
treebfa89b59bc0c3bb29a4a4624c21b80fef32b5ceb /gcc
parentf6462f664725844faa97ae7e8690e4fedee65788 (diff)
downloadgcc-7d75e87f3051fa29d48e923016f04a8042a8ec53.zip
gcc-7d75e87f3051fa29d48e923016f04a8042a8ec53.tar.gz
gcc-7d75e87f3051fa29d48e923016f04a8042a8ec53.tar.bz2
AVR: rtl-optimization/121340 - New mini-pass to undo superfluous moves from insn combine.
Insn combine may come up with superfluous reg-reg moves, where the combine people say that these are no problem since reg-alloc is supposed to optimize them. The issue is that the lower-subreg pass sitting between combine and reg-alloc may split such moves, coming up with a zoo of subregs which are only handled poorly by the register allocator. This patch adds a new avr mini-pass that handles such cases. As an example, take int f_ffssi (long x) { return __builtin_ffsl (x); } where the two functions have the same interface, i.e. there are no extra moves required for the argument or for the return value. However, $ avr-gcc -S -Os -dp -mno-fuse-move ... f_ffssi: mov r20,r22 ; 29 [c=4 l=1] movqi_insn/0 mov r21,r23 ; 30 [c=4 l=1] movqi_insn/0 mov r22,r24 ; 31 [c=4 l=1] movqi_insn/0 mov r23,r25 ; 32 [c=4 l=1] movqi_insn/0 mov r25,r23 ; 33 [c=4 l=4] *movsi/0 mov r24,r22 mov r23,r21 mov r22,r20 rcall __ffssi2 ; 34 [c=16 l=1] *ffssihi2.libgcc ret ; 37 [c=0 l=1] return where all the moves add up to a no-op. The -mno-fuse-move option stops any attempts by the avr backend to clean up that mess. PR rtl-optimization/121340 gcc/ * config/avr/avr.opt (-mfuse-move2): New option. * config/avr/avr-passes.def (avr_pass_2moves): Insert after combine. * config/avr/avr-passes.cc (make_avr_pass_2moves): New function. (pass_data avr_pass_data_2moves): New static variable. (avr_pass_2moves): New rtl_opt_pass. * config/avr/avr-protos.h (make_avr_pass_2moves): New proto. * common/config/avr/avr-common.cc (default_options avr_option_optimization_table) <-mfuse-move2>: Set for -O1 and higher. * doc/invoke.texi (AVR Options) <-mfuse-move2>: Document.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/common/config/avr/avr-common.cc1
-rw-r--r--gcc/config/avr/avr-passes.cc139
-rw-r--r--gcc/config/avr/avr-passes.def8
-rw-r--r--gcc/config/avr/avr-protos.h1
-rw-r--r--gcc/config/avr/avr.opt4
-rw-r--r--gcc/doc/invoke.texi6
6 files changed, 158 insertions, 1 deletions
diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc
index 203a965..d8b982c 100644
--- a/gcc/common/config/avr/avr-common.cc
+++ b/gcc/common/config/avr/avr-common.cc
@@ -38,6 +38,7 @@ static const struct default_options avr_option_optimization_table[] =
{ OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_mfuse_add_, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 },
+ { OPT_LEVELS_1_PLUS, OPT_mfuse_move2, NULL, 1 },
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 },
{ OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
{ OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 6a88a27..69df6d2 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4843,6 +4843,137 @@ avr_pass_fuse_add::execute1 (function *func)
//////////////////////////////////////////////////////////////////////////////
+// Fuse 2 move insns after combine.
+
+static const pass_data avr_pass_data_2moves =
+{
+ RTL_PASS, // type
+ "", // name (will be patched)
+ OPTGROUP_NONE, // optinfo_flags
+ TV_DF_SCAN, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ 0 // todo_flags_finish
+};
+
+class avr_pass_2moves : public rtl_opt_pass
+{
+public:
+ avr_pass_2moves (gcc::context *ctxt, const char *name)
+ : rtl_opt_pass (avr_pass_data_2moves, ctxt)
+ {
+ this->name = name;
+ }
+
+ unsigned int execute (function *func) final override
+ {
+ if (optimize && avropt_fuse_move2)
+ {
+ bool changed = false;
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, func)
+ {
+ changed |= optimize_2moves_bb (bb);
+ }
+
+ if (changed)
+ {
+ df_note_add_problem ();
+ df_analyze ();
+ }
+ }
+
+ return 0;
+ }
+
+ bool optimize_2moves (rtx_insn *, rtx_insn *);
+ bool optimize_2moves_bb (basic_block);
+}; // avr_pass_2moves
+
+bool
+avr_pass_2moves::optimize_2moves_bb (basic_block bb)
+{
+ bool changed = false;
+ rtx_insn *insn1 = nullptr;
+ rtx_insn *insn2 = nullptr;
+ rtx_insn *curr;
+
+ FOR_BB_INSNS (bb, curr)
+ {
+ if (insn1 && INSN_P (insn1)
+ && insn2 && INSN_P (insn2))
+ changed |= optimize_2moves (insn1, insn2);
+
+ insn1 = insn2;
+ insn2 = curr;
+ }
+
+ return changed;
+}
+
+bool
+avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2)
+{
+ bool good = false;
+ bool bad = false;
+ rtx set1, dest1, src1;
+ rtx set2, dest2, src2;
+
+ if ((set1 = single_set (insn1))
+ && (set2 = single_set (insn2))
+ && (src1 = SET_SRC (set1))
+ && REG_P (src2 = SET_SRC (set2))
+ && REG_P (dest1 = SET_DEST (set1))
+ && REG_P (dest2 = SET_DEST (set2))
+ && rtx_equal_p (dest1, src2)
+ // Now we have:
+ // insn1: dest1 = src1
+ // insn2: dest2 = dest1
+ && REGNO (dest1) >= FIRST_PSEUDO_REGISTER
+ // Paranoia.
+ && GET_CODE (PATTERN (insn1)) != PARALLEL
+ && GET_CODE (PATTERN (insn2)) != PARALLEL
+ && (rtx_equal_p (dest2, src1)
+ || !reg_overlap_mentioned_p (dest2, src1)))
+ {
+ avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2);
+ avr_dump (";; reg %d: insn uses uids:", REGNO (dest1));
+
+ // Go check that dest1 is used exactly once, namely by insn2.
+
+ df_ref use = DF_REG_USE_CHAIN (REGNO (dest1));
+ for (; use; use = DF_REF_NEXT_REG (use))
+ {
+ rtx_insn *user = DF_REF_INSN (use);
+ avr_dump (" %d", INSN_UID (user));
+ good |= INSN_UID (user) == INSN_UID (insn2);
+ bad |= INSN_UID (user) != INSN_UID (insn2);
+ }
+ avr_dump (".\n");
+
+ if (good && !bad
+ // Propagate src1 to insn2:
+ // insn1: # Deleted
+ // insn2: dest2 = src1
+ && validate_change (insn2, &SET_SRC (set2), src1, false))
+ {
+ SET_INSN_DELETED (insn1);
+ return true;
+ }
+ }
+
+ if (good && !bad)
+ avr_dump (";; Failed\n");
+
+ return false;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
// Split insns with nonzero_bits() after combine.
static const pass_data avr_pass_data_split_nzb =
@@ -5704,6 +5835,14 @@ make_avr_pass_casesi (gcc::context *ctxt)
return new avr_pass_casesi (ctxt, "avr-casesi");
}
+// Optimize 2 consecutive moves after combine.
+
+rtl_opt_pass *
+make_avr_pass_2moves (gcc::context *ctxt)
+{
+ return new avr_pass_2moves (ctxt, "avr-2moves");
+}
+
rtl_opt_pass *
make_avr_pass_split_nzb (gcc::context *ctxt)
{
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index eb60a93..d668c7f 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -74,6 +74,14 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes);
INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
+/* Insn combine may come up with superfluous reg-reg moves, where the combine
+ people say that these are no problem since reg-alloc is supposed to optimize
+ them. The issue is that the lower-subreg pass sitting between combine and
+ reg-alloc may split such moves, coming up with a zoo of subregs which are
+ only handled poorly by the register allocator. */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_2moves);
+
/* Some combine insns have nonzero_bits() in their condition, though insns
should not use such stuff in their condition. Therefore, we split such
insn into something without nonzero_bits() in their condition right after
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ca30136..37911e7 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -208,6 +208,7 @@ extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_2moves (gcc::context *);
#ifdef RTX_CODE
extern bool avr_casei_sequence_check_operands (rtx *xop);
extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 9883119..7f6f18c 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -164,6 +164,10 @@ mfuse-move=
Target Joined RejectNegative UInteger Var(avropt_fuse_move) Init(0) Optimization IntegerRange(0, 23)
-mfuse-move=<0,23> Optimization. Run a post-reload pass that tweaks move instructions.
+mfuse-move2
+Target Var(avropt_fuse_move2) Init(0) Optimization
+Optimization. Fuse some move insns after insn combine.
+
mabsdata
Target Mask(ABSDATA)
Assume that all data in static storage can be accessed by LDS / STS instructions. This option is only useful for reduced Tiny devices like ATtiny40.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e442a9c..c1e708b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -915,7 +915,7 @@ Objective-C and Objective-C++ Dialects}.
@emph{AVR Options} (@ref{AVR Options})
@gccoptlist{-mmcu=@var{mcu} -mabsdata -maccumulate-args -mcvt
-mbranch-cost=@var{cost} -mfuse-add=@var{level} -mfuse-move=@var{level}
--mcall-prologues -mgas-isr-prologues -mint8 -mflmap
+-mfuse-move2 -mcall-prologues -mgas-isr-prologues -mint8 -mflmap
-mdouble=@var{bits} -mlong-double=@var{bits} -mno-call-main
-mn_flash=@var{size} -mfract-convert-truncate -mno-interrupts
-mmain-is-OS_task -mrelax -mrmw -mstrict-X -mtiny-stack
@@ -25110,6 +25110,10 @@ Valid values for @var{level} are in the range @code{0} @dots{} @code{23}
which is a 3:2:2:2 mixed radix value. Each digit controls some
aspect of the optimization.
+@opindex mfuse-move2
+@item -mfuse-move2
+Run a post combine optimization pass that tries to fuse move instructions.
+
@opindex mstrict-X
@item -mstrict-X
Use address register @code{X} in a way proposed by the hardware. This means