aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>2004-02-17 17:41:44 +0100
committerZdenek Dvorak <rakdver@gcc.gnu.org>2004-02-17 16:41:44 +0000
commit50654f6c03917824e03777073557ac839cb56104 (patch)
tree871928dcce64f79c8e877a86be241c2ed02c9cf3
parentcc7ce44e4c87839efaaddd07d1f03cc50a78d047 (diff)
downloadgcc-50654f6c03917824e03777073557ac839cb56104.zip
gcc-50654f6c03917824e03777073557ac839cb56104.tar.gz
gcc-50654f6c03917824e03777073557ac839cb56104.tar.bz2
loop-iv.c: New file.
* loop-iv.c: New file. * Makefile.in (loop-iv.o): New. * basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros. * cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order, num_loop_branches): New functions. * cfgloop.h (get_loop_body_in_dom_order, num_loop_branches, iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value, find_simple_exit, iv_number_of_iterations, iv_analysis_done, get_simple_loop_desc, free_simple_loop_desc): Declare. (simple_loop_desc): New inline function. (struct rtx_iv, struct niter_desc): New. * cfgloopmanip.c (loopify): Specify semantics more precisely. * expr.c (force_operand): Handle subregs of expressions created by loop unroller. * loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move parts of the initialization to toplev.c * loop-unroll.c (loop_exit_at_end_p): New. (unroll_and_peel_loops): Call iv_analysis_done. (decide_peel_once_rolling, decide_peel_completely, decide_unroll_stupid, decide_unroll_constant_iterations, decide_unroll_runtime_iterations, decide_peel_simple, peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations, unroll_loop_runtime_iterations): Use new simple loop analysis. * loop-unswitch.c (compare_and_jump_seq): New. (may_unswitch_on_p): Renamed to ... (may_unswitch_on): Use new iv analysis. (reversed_condition): Export. (unswitch_single_loop, unswitch_loop): Use new iv analysis. * predict.c (estimate_probability): Use new simple loop analysis. * rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq, canon_condition, simplify_using_condition): Declare. * stor-layout.c (get_mode_bounds): New. * toplev.c (rest_of_handle_loop2): Some parts of initialization/finalization moved here from loop-init.c. From-SVN: r77951
-rw-r--r--gcc/ChangeLog37
-rw-r--r--gcc/Makefile.in4
-rw-r--r--gcc/basic-block.h11
-rw-r--r--gcc/cfgloop.c77
-rw-r--r--gcc/cfgloop.h110
-rw-r--r--gcc/cfgloopmanip.c14
-rw-r--r--gcc/expr.c14
-rw-r--r--gcc/loop-init.c26
-rw-r--r--gcc/loop-iv.c2465
-rw-r--r--gcc/loop-unroll.c356
-rw-r--r--gcc/loop-unswitch.c230
-rw-r--r--gcc/predict.c9
-rw-r--r--gcc/rtl.h11
-rw-r--r--gcc/stor-layout.c23
-rw-r--r--gcc/toplev.c11
15 files changed, 3174 insertions, 224 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 795460c..de24deb 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,40 @@
+2004-02-17 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
+
+ * loop-iv.c: New file.
+ * Makefile.in (loop-iv.o): New.
+ * basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros.
+ * cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order,
+ num_loop_branches): New functions.
+ * cfgloop.h (get_loop_body_in_dom_order, num_loop_branches,
+ iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value,
+ find_simple_exit, iv_number_of_iterations, iv_analysis_done,
+ get_simple_loop_desc, free_simple_loop_desc): Declare.
+ (simple_loop_desc): New inline function.
+ (struct rtx_iv, struct niter_desc): New.
+ * cfgloopmanip.c (loopify): Specify semantics more precisely.
+ * expr.c (force_operand): Handle subregs of expressions created by
+ loop unroller.
+ * loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move
+ parts of the initialization to toplev.c
+ * loop-unroll.c (loop_exit_at_end_p): New.
+ (unroll_and_peel_loops): Call iv_analysis_done.
+ (decide_peel_once_rolling, decide_peel_completely,
+ decide_unroll_stupid, decide_unroll_constant_iterations,
+ decide_unroll_runtime_iterations, decide_peel_simple,
+ peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations,
+ unroll_loop_runtime_iterations): Use new simple loop analysis.
+ * loop-unswitch.c (compare_and_jump_seq): New.
+ (may_unswitch_on_p): Renamed to ...
+ (may_unswitch_on): Use new iv analysis.
+ (reversed_condition): Export.
+ (unswitch_single_loop, unswitch_loop): Use new iv analysis.
+ * predict.c (estimate_probability): Use new simple loop analysis.
+ * rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq,
+ canon_condition, simplify_using_condition): Declare.
+ * stor-layout.c (get_mode_bounds): New.
+ * toplev.c (rest_of_handle_loop2): Some parts of
+ initialization/finalization moved here from loop-init.c.
+
2004-02-17 Kazu Hirata <kazu@cs.umass.edu>
* config/h8300/h8300.h (FIXED_REGISTERS): Add the soft frame
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d30c4fa..06a70f2 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -848,7 +848,7 @@ OBJS-common = \
cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \
cfgrtl.o combine.o conflict.o convert.o coverage.o cse.o cselib.o \
dbxout.o debug.o df.o diagnostic.o dojump.o doloop.o dominance.o \
- dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o \
+ dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \
expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \
genrtl.o ggc-common.o global.o graph.o gtype-desc.o \
haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \
@@ -1719,6 +1719,8 @@ cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h flags.h
cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
+loop-iv.o : loop-iv.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(GGC_H) \
+ $(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h $(EXPR_H) coretypes.h $(TM_H)
cfgloopmanip.o : cfgloopmanip.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
$(BASIC_BLOCK_H) hard-reg-set.h cfgloop.h cfglayout.h output.h coretypes.h $(TM_H)
loop-init.o : loop-init.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \
diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 240edfd..3f1775d 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -288,6 +288,17 @@ extern varray_type basic_block_info;
#define FOR_EACH_BB_REVERSE(BB) \
FOR_BB_BETWEEN (BB, EXIT_BLOCK_PTR->prev_bb, ENTRY_BLOCK_PTR, prev_bb)
+/* For iterating over insns in basic block. */
+#define FOR_BB_INSNS(BB, INSN) \
+ for ((INSN) = BB_HEAD (BB); \
+ (INSN) != NEXT_INSN (BB_END (BB)); \
+ (INSN) = NEXT_INSN (INSN))
+
+#define FOR_BB_INSNS_REVERSE(BB, INSN) \
+ for ((INSN) = BB_END (BB); \
+ (INSN) != PREV_INSN (BB_HEAD (BB)); \
+ (INSN) = PREV_INSN (INSN))
+
/* Cycles through _all_ basic blocks, even the fake ones (entry and
exit block). */
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 6fa8b17..2be4b7c 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -959,6 +959,62 @@ get_loop_body (const struct loop *loop)
return tovisit;
}
+/* Fills dominance descendants inside LOOP of the basic block BB into
+ array TOVISIT from index *TV. */
+
+static void
+fill_sons_in_loop (const struct loop *loop, basic_block bb,
+ basic_block *tovisit, int *tv)
+{
+ basic_block son, postpone = NULL;
+
+ tovisit[(*tv)++] = bb;
+ for (son = first_dom_son (CDI_DOMINATORS, bb);
+ son;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ {
+ if (!flow_bb_inside_loop_p (loop, son))
+ continue;
+
+ if (dominated_by_p (CDI_DOMINATORS, loop->latch, son))
+ {
+ postpone = son;
+ continue;
+ }
+ fill_sons_in_loop (loop, son, tovisit, tv);
+ }
+
+ if (postpone)
+ fill_sons_in_loop (loop, postpone, tovisit, tv);
+}
+
+/* Gets body of a LOOP (that must be different from the outermost loop)
+ sorted by dominance relation. Additionally, if a basic block s dominates
+ the latch, then only blocks dominated by s are be after it. */
+
+basic_block *
+get_loop_body_in_dom_order (const struct loop *loop)
+{
+ basic_block *tovisit;
+ int tv;
+
+ if (!loop->num_nodes)
+ abort ();
+
+ tovisit = xcalloc (loop->num_nodes, sizeof (basic_block));
+
+ if (loop->latch == EXIT_BLOCK_PTR)
+ abort ();
+
+ tv = 0;
+ fill_sons_in_loop (loop, loop->header, tovisit, &tv);
+
+ if (tv != (int) loop->num_nodes)
+ abort ();
+
+ return tovisit;
+}
+
/* Gets exit edges of a LOOP, returning their number in N_EDGES. */
edge *
get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
@@ -988,6 +1044,27 @@ get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges)
return edges;
}
+/* Counts the number of conditional branches inside LOOP. */
+
+unsigned
+num_loop_branches (const struct loop *loop)
+{
+ unsigned i, n;
+ basic_block * body;
+
+ if (loop->latch == EXIT_BLOCK_PTR)
+ abort ();
+
+ body = get_loop_body (loop);
+ n = 0;
+ for (i = 0; i < loop->num_nodes; i++)
+ if (body[i]->succ && body[i]->succ->succ_next)
+ n++;
+ free (body);
+
+ return n;
+}
+
/* Adds basic block BB to LOOP. */
void
add_bb_to_loop (basic_block bb, struct loop *loop)
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 4d8c67d..58184b5 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -278,7 +278,9 @@ extern int average_num_loop_insns (struct loop *);
/* Loops & cfg manipulation. */
extern basic_block *get_loop_body (const struct loop *);
+extern basic_block *get_loop_body_in_dom_order (const struct loop *);
extern edge *get_loop_exit_edges (const struct loop *, unsigned *);
+extern unsigned num_loop_branches (const struct loop *);
extern edge loop_preheader_edge (const struct loop *);
extern edge loop_latch_edge (const struct loop *);
@@ -322,6 +324,114 @@ extern void unloop (struct loops *, struct loop *);
extern bool remove_path (struct loops *, edge);
extern edge split_loop_bb (basic_block, rtx);
+/* Induction variable analysis. */
+
+/* The description of induction variable. The things are a bit complicated
+ due to need to handle subregs and extends. The value of the object described
+ by it can be obtained as follows (all computations are done in extend_mode):
+
+ Value in i-th iteration is
+ delta + mult * extend_{extend_mode} (subreg_{mode} (base + i * step)).
+
+ If first_special is true, the value in the first iteration is
+ delta + mult * base
+
+ If extend = NIL, first_special must be false, delta 0, mult 1 and value is
+ subreg_{mode} (base + i * step)
+
+ The get_iv_value function can be used to obtain these expressions.
+
+ ??? Add a third mode field that would specify the mode in that inner
+ computation is done, which would enable it to be different from the
+ outer one? */
+
+struct rtx_iv
+{
+ /* Its base and step (mode of base and step is supposed to be extend_mode,
+ see the description above). */
+ rtx base, step;
+
+ /* The type of extend applied to it (SIGN_EXTEND, ZERO_EXTEND or NIL). */
+ enum rtx_code extend;
+
+ /* Operations applied in the extended mode. */
+ rtx delta, mult;
+
+ /* The mode it is extended to. */
+ enum machine_mode extend_mode;
+
+ /* The mode the variable iterates in. */
+ enum machine_mode mode;
+
+ /* Whether we have already filled the remaining fields. */
+ unsigned analysed : 1;
+
+ /* Whether the first iteration needs to be handled specially. */
+ unsigned first_special : 1;
+};
+
+/* This should replace struct loop_desc. We keep this just so that we are
+ able to compare the results. */
+
+struct niter_desc
+{
+ /* The edge out of the loop. */
+ edge out_edge;
+
+ /* The other edge leading from the condition. */
+ edge in_edge;
+
+ /* True if we are able to say anything about number of iterations of the
+ loop. */
+ bool simple_p;
+
+ /* True if the loop iterates the constant number of times. */
+ bool const_iter;
+
+ /* Number of iterations if constant. */
+ unsigned HOST_WIDEST_INT niter;
+
+ /* Upper bound on the number of iterations. */
+ unsigned HOST_WIDEST_INT niter_max;
+
+ /* Assumptions under that the rest of the information is valid. */
+ rtx assumptions;
+
+ /* Assumptions under that the loop ends before reaching the latch,
+ even if value of niter_expr says otherwise. */
+ rtx noloop_assumptions;
+
+ /* Condition under that the loop is infinite. */
+ rtx infinite;
+
+ /* Whether the comparison is signed. */
+ bool signed_p;
+
+ /* The mode in that niter_expr should be computed. */
+ enum machine_mode mode;
+
+ /* The number of iterations of the loop. */
+ rtx niter_expr;
+};
+
+extern void iv_analysis_loop_init (struct loop *);
+extern rtx iv_get_reaching_def (rtx, rtx);
+extern bool iv_analyse (rtx, rtx, struct rtx_iv *);
+extern rtx get_iv_value (struct rtx_iv *, rtx);
+extern void find_simple_exit (struct loop *, struct niter_desc *);
+extern void iv_number_of_iterations (struct loop *, rtx, rtx,
+ struct niter_desc *);
+extern void iv_analysis_done (void);
+
+extern struct niter_desc *get_simple_loop_desc (struct loop *loop);
+extern void free_simple_loop_desc (struct loop *loop);
+
+static inline struct niter_desc *
+simple_loop_desc (struct loop *loop)
+{
+ return loop->aux;
+}
+
/* Loop optimizer initialization. */
extern struct loops *loop_optimizer_init (FILE *);
extern void loop_optimizer_finalize (struct loops *, FILE *);
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index 3b8bcec..35444ee 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -480,11 +480,13 @@ scale_loop_frequencies (struct loop *loop, int num, int den)
accordingly. Everything between them plus LATCH_EDGE destination must
be dominated by HEADER_EDGE destination, and back-reachable from
LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
- SWITCH_BB->succ to original destination of LATCH_EDGE and
- SWITCH_BB->succ->succ_next to original destination of HEADER_EDGE.
+ FALLTHRU_EDGE (SWITCH_BB) to original destination of HEADER_EDGE and
+ BRANCH_EDGE (SWITCH_BB) to original destination of LATCH_EDGE.
Returns newly created loop. */
+
struct loop *
-loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block switch_bb)
+loopify (struct loops *loops, edge latch_edge, edge header_edge,
+ basic_block switch_bb)
{
basic_block succ_bb = latch_edge->dest;
basic_block pred_bb = header_edge->src;
@@ -509,13 +511,15 @@ loopify (struct loops *loops, edge latch_edge, edge header_edge, basic_block swi
/* Redirect edges. */
loop_redirect_edge (latch_edge, loop->header);
+ loop_redirect_edge (BRANCH_EDGE (switch_bb), succ_bb);
+
loop_redirect_edge (header_edge, switch_bb);
- loop_redirect_edge (switch_bb->succ->succ_next, loop->header);
- loop_redirect_edge (switch_bb->succ, succ_bb);
+ loop_redirect_edge (FALLTHRU_EDGE (switch_bb), loop->header);
/* Update dominators. */
set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
+
set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
/* Compute new loop. */
diff --git a/gcc/expr.c b/gcc/expr.c
index a6d222e..223a36b 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -5588,6 +5588,20 @@ force_operand (rtx value, rtx target)
rtx subtarget = get_subtarget (target);
enum rtx_code code = GET_CODE (value);
+ /* Check for subreg applied to an expression produced by loop optimizer. */
+ if (code == SUBREG
+ && GET_CODE (SUBREG_REG (value)) != REG
+ && GET_CODE (SUBREG_REG (value)) != MEM)
+ {
+ value = simplify_gen_subreg (GET_MODE (value),
+ force_reg (GET_MODE (SUBREG_REG (value)),
+ force_operand (SUBREG_REG (value),
+ NULL_RTX)),
+ GET_MODE (SUBREG_REG (value)),
+ SUBREG_BYTE (value));
+ code = GET_CODE (value);
+ }
+
/* Check for a PIC address load. */
if ((code == PLUS || code == MINUS)
&& XEXP (value, 0) == pic_offset_table_rtx
diff --git a/gcc/loop-init.c b/gcc/loop-init.c
index 0b882d9..19d53e1 100644
--- a/gcc/loop-init.c
+++ b/gcc/loop-init.c
@@ -36,9 +36,6 @@ loop_optimizer_init (FILE *dumpfile)
struct loops *loops = xcalloc (1, sizeof (struct loops));
edge e;
- /* Initialize structures for layout changes. */
- cfg_layout_initialize ();
-
/* Avoid annoying special cases of edges going to exit
block. */
for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
@@ -49,18 +46,11 @@ loop_optimizer_init (FILE *dumpfile)
if (flow_loops_find (loops, LOOP_TREE) <= 1)
{
- basic_block bb;
-
/* No loops. */
flow_loops_free (loops);
free_dominance_info (CDI_DOMINATORS);
free (loops);
- /* Make chain. */
- FOR_EACH_BB (bb)
- if (bb->next_bb != EXIT_BLOCK_PTR)
- bb->rbi->next = bb->next_bb;
- cfg_layout_finalize ();
return NULL;
}
@@ -94,13 +84,14 @@ loop_optimizer_init (FILE *dumpfile)
void
loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
{
- basic_block bb;
+ unsigned i;
- /* Finalize layout changes. */
- /* Make chain. */
- FOR_EACH_BB (bb)
- if (bb->next_bb != EXIT_BLOCK_PTR)
- bb->rbi->next = bb->next_bb;
+ if (!loops)
+ return;
+
+ for (i = 1; i < loops->num; i++)
+ if (loops->parray[i])
+ free_simple_loop_desc (loops->parray[i]);
/* Another dump. */
flow_loops_dump (loops, dumpfile, NULL, 1);
@@ -110,9 +101,6 @@ loop_optimizer_finalize (struct loops *loops, FILE *dumpfile)
free_dominance_info (CDI_DOMINATORS);
free (loops);
- /* Finalize changes. */
- cfg_layout_finalize ();
-
/* Checking. */
#ifdef ENABLE_CHECKING
verify_flow_info ();
diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c
new file mode 100644
index 0000000..efdcc73
--- /dev/null
+++ b/gcc/loop-iv.c
@@ -0,0 +1,2465 @@
+/* Rtl-level induction variable analysis.
+ Copyright (C) 2004 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+
+/* This is just a very simplistic analysis of induction variables of the loop.
+ The major use is for determining the number of iterations of a loop for
+ loop unrolling, doloop optimization and branch prediction. For this we
+ are only interested in bivs and a fairly limited set of givs that are
+ needed in the exit condition. We also only compute the iv information on
+ demand.
+
+ The interesting registers are determined. A register is interesting if
+
+ -- it is set only in the blocks that dominate the latch of the current loop
+ -- all its sets are simple -- i.e. in the form we understand
+
+ We also number the insns sequentially in each basic block. For a use of the
+ interesting reg, it is now easy to find a reaching definition (there may be
+ only one).
+
+ Induction variable is then simply analysed by walking the use-def
+ chains.
+
+ Usage:
+
+ iv_analysis_loop_init (loop);
+ insn = iv_get_reaching_def (where, reg);
+ if (iv_analyse (insn, reg, &iv))
+ {
+ ...
+ }
+ iv_analysis_done (); */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "cfgloop.h"
+#include "expr.h"
+#include "output.h"
+
+/* The insn information. */
+
+struct insn_info
+{
+ /* Id of the insn. */
+ unsigned luid;
+
+ /* The previous definition of the register defined by the single
+ set in the insn. */
+ rtx prev_def;
+
+ /* The description of the iv. */
+ struct rtx_iv iv;
+};
+
+static struct insn_info *insn_info;
+
+/* The last definition of register. */
+
+static rtx *last_def;
+
+/* The bivs. */
+
+static struct rtx_iv *bivs;
+
+/* Maximal insn number for that there is place in insn_info array. */
+
+static unsigned max_insn_no;
+
+/* Maximal register number for that there is place in bivs and last_def
+ arrays. */
+
+static unsigned max_reg_no;
+
+/* Dumps information about IV to FILE. */
+
+extern void dump_iv_info (FILE *, struct rtx_iv *);
+void
+dump_iv_info (FILE *file, struct rtx_iv *iv)
+{
+ if (!iv->base)
+ {
+ fprintf (file, "not simple");
+ return;
+ }
+
+ if (iv->step == const0_rtx)
+ {
+ fprintf (file, "invariant ");
+ print_rtl (file, iv->base);
+ return;
+ }
+
+ print_rtl (file, iv->base);
+ fprintf (file, " + ");
+ print_rtl (file, iv->step);
+ fprintf (file, " * iteration");
+ fprintf (file, " (in %s)", GET_MODE_NAME (iv->mode));
+
+ if (iv->mode != iv->extend_mode)
+ fprintf (file, " %s to %s",
+ rtx_name[iv->extend],
+ GET_MODE_NAME (iv->extend_mode));
+
+ if (iv->mult != const1_rtx)
+ {
+ fprintf (file, " * ");
+ print_rtl (file, iv->mult);
+ }
+ if (iv->delta != const0_rtx)
+ {
+ fprintf (file, " + ");
+ print_rtl (file, iv->delta);
+ }
+ if (iv->first_special)
+ fprintf (file, " (first special)");
+}
+
+/* Assigns luids to insns in basic block BB. */
+
+static void
+assign_luids (basic_block bb)
+{
+ unsigned i = 0, uid;
+ rtx insn;
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ uid = INSN_UID (insn);
+ insn_info[uid].luid = i++;
+ insn_info[uid].prev_def = NULL_RTX;
+ insn_info[uid].iv.analysed = false;
+ }
+}
+
+/* Generates a subreg to get the least significant part of EXPR (in mode
+ INNER_MODE) to OUTER_MODE. */
+
+static rtx
+lowpart_subreg (enum machine_mode outer_mode, rtx expr,
+ enum machine_mode inner_mode)
+{
+ return simplify_gen_subreg (outer_mode, expr, inner_mode,
+ subreg_lowpart_offset (outer_mode, inner_mode));
+}
+
+/* Checks whether REG is a well-behaved register. */
+
+static bool
+simple_reg_p (rtx reg)
+{
+ unsigned r;
+
+ if (GET_CODE (reg) == SUBREG)
+ {
+ if (!subreg_lowpart_p (reg))
+ return false;
+ reg = SUBREG_REG (reg);
+ }
+
+ if (!REG_P (reg))
+ return false;
+
+ r = REGNO (reg);
+ if (HARD_REGISTER_NUM_P (r))
+ return false;
+
+ if (GET_MODE_CLASS (GET_MODE (reg)) != MODE_INT)
+ return false;
+
+ if (last_def[r] == const0_rtx)
+ return false;
+
+ return true;
+}
+
+/* Checks whether assignment LHS = RHS is simple enough for us to process. */
+
+static bool
+simple_set_p (rtx lhs, rtx rhs)
+{
+ rtx op0, op1;
+
+ if (!REG_P (lhs)
+ || !simple_reg_p (lhs))
+ return false;
+
+ if (CONSTANT_P (rhs))
+ return true;
+
+ switch (GET_CODE (rhs))
+ {
+ case SUBREG:
+ case REG:
+ return simple_reg_p (rhs);
+
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ case NEG:
+ return simple_reg_p (XEXP (rhs, 0));
+
+ case PLUS:
+ case MINUS:
+ case MULT:
+ op0 = XEXP (rhs, 0);
+ op1 = XEXP (rhs, 1);
+
+ if (!simple_reg_p (op0)
+ && !CONSTANT_P (op0))
+ return false;
+
+ if (!simple_reg_p (op1)
+ && !CONSTANT_P (op1))
+ return false;
+
+ if (GET_CODE (rhs) == MULT
+ && !CONSTANT_P (op0)
+ && !CONSTANT_P (op1))
+ return false;
+
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* Mark single SET in INSN. */
+
+static rtx
+mark_single_set (rtx insn, rtx set)
+{
+ rtx def = SET_DEST (set), src;
+ unsigned regno, uid;
+
+ src = find_reg_equal_equiv_note (insn);
+ if (!src)
+ src = SET_SRC (set);
+
+ if (!simple_set_p (SET_DEST (set), src))
+ return NULL_RTX;
+
+ regno = REGNO (def);
+ uid = INSN_UID (insn);
+
+ bivs[regno].analysed = false;
+ insn_info[uid].prev_def = last_def[regno];
+ last_def[regno] = insn;
+
+ return def;
+}
+
+/* Invalidate register REG unless it is equal to EXCEPT. */
+
+static void
+kill_sets (rtx reg, rtx by ATTRIBUTE_UNUSED, void *except)
+{
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+ if (!REG_P (reg))
+ return;
+ if (reg == except)
+ return;
+
+ last_def[REGNO (reg)] = const0_rtx;
+}
+
+/* Marks sets in basic block BB. If DOM is true, BB dominates the loop
+ latch. */
+
+static void
+mark_sets (basic_block bb, bool dom)
+{
+ rtx insn, set, def;
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!INSN_P (insn))
+ continue;
+
+ if (dom
+ && (set = single_set (insn)))
+ def = mark_single_set (insn, set);
+ else
+ def = NULL_RTX;
+
+ note_stores (PATTERN (insn), kill_sets, def);
+ }
+}
+
+/* Prepare the data for an induction variable analysis of a LOOP. */
+
+void
+iv_analysis_loop_init (struct loop *loop)
+{
+ basic_block *body = get_loop_body_in_dom_order (loop);
+ unsigned b;
+
+ if ((unsigned) get_max_uid () >= max_insn_no)
+ {
+ /* Add some reserve for insns and registers produced in optimizations. */
+ max_insn_no = get_max_uid () + 100;
+ if (insn_info)
+ free (insn_info);
+ insn_info = xmalloc (max_insn_no * sizeof (struct insn_info));
+ }
+
+ if ((unsigned) max_reg_num () >= max_reg_no)
+ {
+ max_reg_no = max_reg_num () + 100;
+ if (last_def)
+ free (last_def);
+ last_def = xmalloc (max_reg_no * sizeof (rtx));
+ if (bivs)
+ free (bivs);
+ bivs = xmalloc (max_reg_no * sizeof (struct rtx_iv));
+ }
+
+ memset (last_def, 0, max_reg_num () * sizeof (rtx));
+
+ for (b = 0; b < loop->num_nodes; b++)
+ {
+ assign_luids (body[b]);
+ mark_sets (body[b], just_once_each_iteration_p (loop, body[b]));
+ }
+
+ free (body);
+}
+
+/* Gets definition of REG reaching the INSN. If REG is not simple, const0_rtx
+ is returned. If INSN is before the first def in the loop, NULL_RTX is
+ returned. */
+
+rtx
+iv_get_reaching_def (rtx insn, rtx reg)
+{
+ unsigned regno, luid, auid;
+ rtx ainsn;
+ basic_block bb, abb;
+
+ if (GET_CODE (reg) == SUBREG)
+ {
+ if (!subreg_lowpart_p (reg))
+ return const0_rtx;
+ reg = SUBREG_REG (reg);
+ }
+ if (!REG_P (reg))
+ return NULL_RTX;
+
+ regno = REGNO (reg);
+ if (!last_def[regno]
+ || last_def[regno] == const0_rtx)
+ return last_def[regno];
+
+ bb = BLOCK_FOR_INSN (insn);
+ luid = insn_info[INSN_UID (insn)].luid;
+
+ ainsn = last_def[regno];
+ while (1)
+ {
+ abb = BLOCK_FOR_INSN (ainsn);
+
+ if (dominated_by_p (CDI_DOMINATORS, bb, abb))
+ break;
+
+ auid = INSN_UID (ainsn);
+ ainsn = insn_info[auid].prev_def;
+
+ if (!ainsn)
+ return NULL_RTX;
+ }
+
+ while (1)
+ {
+ abb = BLOCK_FOR_INSN (ainsn);
+ if (abb != bb)
+ return ainsn;
+
+ auid = INSN_UID (ainsn);
+ if (luid > insn_info[auid].luid)
+ return ainsn;
+
+ ainsn = insn_info[auid].prev_def;
+ if (!ainsn)
+ return NULL_RTX;
+ }
+}
+
+/* Sets IV to invariant CST in MODE. Always returns true (just for
+ consistency with other iv manipulation functions that may fail). */
+
+static bool
+iv_constant (struct rtx_iv *iv, rtx cst, enum machine_mode mode)
+{
+ if (mode == VOIDmode)
+ mode = GET_MODE (cst);
+
+ iv->analysed = true;
+ iv->mode = mode;
+ iv->base = cst;
+ iv->step = const0_rtx;
+ iv->first_special = false;
+ iv->extend = NIL;
+ iv->extend_mode = iv->mode;
+ iv->delta = const0_rtx;
+ iv->mult = const1_rtx;
+
+ return true;
+}
+
+/* Evaluates application of subreg to MODE on IV. */
+
+static bool
+iv_subreg (struct rtx_iv *iv, enum machine_mode mode)
+{
+ if (iv->extend_mode == mode)
+ return true;
+
+ if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (iv->mode))
+ return false;
+
+ iv->extend = NIL;
+ iv->mode = mode;
+
+ iv->base = simplify_gen_binary (PLUS, iv->extend_mode, iv->delta,
+ simplify_gen_binary (MULT, iv->extend_mode,
+ iv->base, iv->mult));
+ iv->step = simplify_gen_binary (MULT, iv->extend_mode, iv->step, iv->mult);
+ iv->mult = const1_rtx;
+ iv->delta = const0_rtx;
+ iv->first_special = false;
+
+ return true;
+}
+
+/* Evaluates application of EXTEND to MODE on IV. */
+
+static bool
+iv_extend (struct rtx_iv *iv, enum rtx_code extend, enum machine_mode mode)
+{
+ if (mode != iv->extend_mode)
+ return false;
+
+ if (iv->extend != NIL
+ && iv->extend != extend)
+ return false;
+
+ iv->extend = extend;
+
+ return true;
+}
+
+/* Evaluates negation of IV. */
+
+static bool
+iv_neg (struct rtx_iv *iv)
+{
+ if (iv->extend == NIL)
+ {
+ iv->base = simplify_gen_unary (NEG, iv->extend_mode,
+ iv->base, iv->extend_mode);
+ iv->step = simplify_gen_unary (NEG, iv->extend_mode,
+ iv->step, iv->extend_mode);
+ }
+ else
+ {
+ iv->delta = simplify_gen_unary (NEG, iv->extend_mode,
+ iv->delta, iv->extend_mode);
+ iv->mult = simplify_gen_unary (NEG, iv->extend_mode,
+ iv->mult, iv->extend_mode);
+ }
+
+ return true;
+}
+
+/* Evaluates addition or subtraction (according to OP) of IV1 to IV0. */
+
+static bool
+iv_add (struct rtx_iv *iv0, struct rtx_iv *iv1, enum rtx_code op)
+{
+ enum machine_mode mode;
+ rtx arg;
+
+ /* Extend the constant to extend_mode of the other operand if neccesary. */
+ if (iv0->extend == NIL
+ && iv0->mode == iv0->extend_mode
+ && iv0->step == const0_rtx
+ && GET_MODE_SIZE (iv0->extend_mode) < GET_MODE_SIZE (iv1->extend_mode))
+ {
+ iv0->extend_mode = iv1->extend_mode;
+ iv0->base = simplify_gen_unary (ZERO_EXTEND, iv0->extend_mode,
+ iv0->base, iv0->mode);
+ }
+ if (iv1->extend == NIL
+ && iv1->mode == iv1->extend_mode
+ && iv1->step == const0_rtx
+ && GET_MODE_SIZE (iv1->extend_mode) < GET_MODE_SIZE (iv0->extend_mode))
+ {
+ iv1->extend_mode = iv0->extend_mode;
+ iv1->base = simplify_gen_unary (ZERO_EXTEND, iv1->extend_mode,
+ iv1->base, iv1->mode);
+ }
+
+ mode = iv0->extend_mode;
+ if (mode != iv1->extend_mode)
+ return false;
+
+ if (iv0->extend == NIL && iv1->extend == NIL)
+ {
+ if (iv0->mode != iv1->mode)
+ return false;
+
+ iv0->base = simplify_gen_binary (op, mode, iv0->base, iv1->base);
+ iv0->step = simplify_gen_binary (op, mode, iv0->step, iv1->step);
+
+ return true;
+ }
+
+ /* Handle addition of constant. */
+ if (iv1->extend == NIL
+ && iv1->mode == mode
+ && iv1->step == const0_rtx)
+ {
+ iv0->delta = simplify_gen_binary (op, mode, iv0->delta, iv1->base);
+ return true;
+ }
+
+ if (iv0->extend == NIL
+ && iv0->mode == mode
+ && iv0->step == const0_rtx)
+ {
+ arg = iv0->base;
+ *iv0 = *iv1;
+ if (op == MINUS
+ && !iv_neg (iv0))
+ return false;
+
+ iv0->delta = simplify_gen_binary (PLUS, mode, iv0->delta, arg);
+ return true;
+ }
+
+ return false;
+}
+
+/* Evaluates multiplication of IV by constant CST. */
+
+static bool
+iv_mult (struct rtx_iv *iv, rtx mby)
+{
+ enum machine_mode mode = iv->extend_mode;
+
+ if (GET_MODE (mby) != VOIDmode
+ && GET_MODE (mby) != mode)
+ return false;
+
+ if (iv->extend == NIL)
+ {
+ iv->base = simplify_gen_binary (MULT, mode, iv->base, mby);
+ iv->step = simplify_gen_binary (MULT, mode, iv->step, mby);
+ }
+ else
+ {
+ iv->delta = simplify_gen_binary (MULT, mode, iv->delta, mby);
+ iv->mult = simplify_gen_binary (MULT, mode, iv->mult, mby);
+ }
+
+ return true;
+}
+
+/* The recursive part of get_biv_step. Gets the value of the single value
+ defined in INSN wrto initial value of REG inside loop, in shape described
+ at get_biv_step. */
+
+static bool
+get_biv_step_1 (rtx insn, rtx reg,
+ rtx *inner_step, enum machine_mode *inner_mode,
+ enum rtx_code *extend, enum machine_mode outer_mode,
+ rtx *outer_step)
+{
+ rtx set, lhs, rhs, op0 = NULL_RTX, op1 = NULL_RTX;
+ rtx next, nextr, def_insn, tmp;
+ enum rtx_code code;
+
+ set = single_set (insn);
+ rhs = find_reg_equal_equiv_note (insn);
+ if (!rhs)
+ rhs = SET_SRC (set);
+ lhs = SET_DEST (set);
+
+ code = GET_CODE (rhs);
+ switch (code)
+ {
+ case SUBREG:
+ case REG:
+ next = rhs;
+ break;
+
+ case PLUS:
+ case MINUS:
+ op0 = XEXP (rhs, 0);
+ op1 = XEXP (rhs, 1);
+
+ if (code == PLUS && CONSTANT_P (op0))
+ {
+ tmp = op0; op0 = op1; op1 = tmp;
+ }
+
+ if (!simple_reg_p (op0)
+ || !CONSTANT_P (op1))
+ return false;
+
+ if (GET_MODE (rhs) != outer_mode)
+ {
+ /* ppc64 uses expressions like
+
+ (set x:SI (plus:SI (subreg:SI y:DI) 1)).
+
+ this is equivalent to
+
+ (set x':DI (plus:DI y:DI 1))
+ (set x:SI (subreg:SI (x':DI)). */
+ if (GET_CODE (op0) != SUBREG)
+ return false;
+ if (GET_MODE (SUBREG_REG (op0)) != outer_mode)
+ return false;
+ }
+
+ next = op0;
+ break;
+
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ if (GET_MODE (rhs) != outer_mode)
+ return false;
+
+ op0 = XEXP (rhs, 0);
+ if (!simple_reg_p (op0))
+ return false;
+
+ next = op0;
+ break;
+
+ default:
+ return false;
+ }
+
+ if (GET_CODE (next) == SUBREG)
+ {
+ if (!subreg_lowpart_p (next))
+ return false;
+
+ nextr = SUBREG_REG (next);
+ if (GET_MODE (nextr) != outer_mode)
+ return false;
+ }
+ else
+ nextr = next;
+
+ def_insn = iv_get_reaching_def (insn, nextr);
+ if (def_insn == const0_rtx)
+ return false;
+
+ if (!def_insn)
+ {
+ if (!rtx_equal_p (nextr, reg))
+ return false;
+
+ *inner_step = const0_rtx;
+ *extend = NIL;
+ *inner_mode = outer_mode;
+ *outer_step = const0_rtx;
+ }
+ else if (!get_biv_step_1 (def_insn, reg,
+ inner_step, inner_mode, extend, outer_mode,
+ outer_step))
+ return false;
+
+ if (GET_CODE (next) == SUBREG)
+ {
+ enum machine_mode amode = GET_MODE (next);
+
+ if (GET_MODE_SIZE (amode) > GET_MODE_SIZE (*inner_mode))
+ return false;
+
+ *inner_mode = amode;
+ *inner_step = simplify_gen_binary (PLUS, outer_mode,
+ *inner_step, *outer_step);
+ *outer_step = const0_rtx;
+ *extend = NIL;
+ }
+
+ switch (code)
+ {
+ case REG:
+ case SUBREG:
+ break;
+
+ case PLUS:
+ case MINUS:
+ if (*inner_mode == outer_mode
+ /* See comment in previous switch. */
+ || GET_MODE (rhs) != outer_mode)
+ *inner_step = simplify_gen_binary (code, outer_mode,
+ *inner_step, op1);
+ else
+ *outer_step = simplify_gen_binary (code, outer_mode,
+ *outer_step, op1);
+ break;
+
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ if (GET_MODE (op0) != *inner_mode
+ || *extend != NIL
+ || *outer_step != const0_rtx)
+ abort ();
+
+ *extend = code;
+ break;
+
+ default:
+ abort ();
+ }
+
+ return true;
+}
+
+/* Gets the operation on register REG inside loop, in shape
+
+ OUTER_STEP + EXTEND_{OUTER_MODE} (SUBREG_{INNER_MODE} (REG + INNER_STEP))
+
+ If the operation cannot be described in this shape, return false. */
+
+static bool
+get_biv_step (rtx reg, rtx *inner_step, enum machine_mode *inner_mode,
+ enum rtx_code *extend, enum machine_mode *outer_mode,
+ rtx *outer_step)
+{
+ *outer_mode = GET_MODE (reg);
+
+ if (!get_biv_step_1 (last_def[REGNO (reg)], reg,
+ inner_step, inner_mode, extend, *outer_mode,
+ outer_step))
+ return false;
+
+ if (*inner_mode != *outer_mode
+ && *extend == NIL)
+ abort ();
+
+ if (*inner_mode == *outer_mode
+ && *extend != NIL)
+ abort ();
+
+ if (*inner_mode == *outer_mode
+ && *outer_step != const0_rtx)
+ abort ();
+
+ return true;
+}
+
+/* Determines whether DEF is a biv and if so, stores its description
+ to *IV. */
+
+static bool
+iv_analyse_biv (rtx def, struct rtx_iv *iv)
+{
+ unsigned regno;
+ rtx inner_step, outer_step;
+ enum machine_mode inner_mode, outer_mode;
+ enum rtx_code extend;
+
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file, "Analysing ");
+ print_rtl (rtl_dump_file, def);
+ fprintf (rtl_dump_file, " for bivness.\n");
+ }
+
+ if (!REG_P (def))
+ {
+ if (!CONSTANT_P (def))
+ return false;
+
+ return iv_constant (iv, def, VOIDmode);
+ }
+
+ regno = REGNO (def);
+ if (last_def[regno] == const0_rtx)
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, " not simple.\n");
+ return false;
+ }
+
+ if (last_def[regno] && bivs[regno].analysed)
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, " already analysed.\n");
+
+ *iv = bivs[regno];
+ return iv->base != NULL_RTX;
+ }
+
+ if (!last_def[regno])
+ {
+ iv_constant (iv, def, VOIDmode);
+ goto end;
+ }
+
+ iv->analysed = true;
+ if (!get_biv_step (def, &inner_step, &inner_mode, &extend,
+ &outer_mode, &outer_step))
+ {
+ iv->base = NULL_RTX;
+ goto end;
+ }
+
+ /* Loop transforms base to es (base + inner_step) + outer_step,
+ where es means extend of subreg between inner_mode and outer_mode.
+ The corresponding induction variable is
+
+ es ((base - outer_step) + i * (inner_step + outer_step)) + outer_step */
+
+ iv->base = simplify_gen_binary (MINUS, outer_mode, def, outer_step);
+ iv->step = simplify_gen_binary (PLUS, outer_mode, inner_step, outer_step);
+ iv->mode = inner_mode;
+ iv->extend_mode = outer_mode;
+ iv->extend = extend;
+ iv->mult = const1_rtx;
+ iv->delta = outer_step;
+ iv->first_special = inner_mode != outer_mode;
+
+end:
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file, " ");
+ dump_iv_info (rtl_dump_file, iv);
+ fprintf (rtl_dump_file, "\n");
+ }
+
+ bivs[regno] = *iv;
+
+ return iv->base != NULL_RTX;
+}
+
+/* Analyses operand OP of INSN and stores the result to *IV. */
+
+static bool
+iv_analyse_op (rtx insn, rtx op, struct rtx_iv *iv)
+{
+ rtx def_insn;
+ unsigned regno;
+ bool inv = CONSTANT_P (op);
+
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file, "Analysing operand ");
+ print_rtl (rtl_dump_file, op);
+ fprintf (rtl_dump_file, " of insn ");
+ print_rtl_single (rtl_dump_file, insn);
+ }
+
+ if (GET_CODE (op) == SUBREG)
+ {
+ if (!subreg_lowpart_p (op))
+ return false;
+
+ if (!iv_analyse_op (insn, SUBREG_REG (op), iv))
+ return false;
+
+ return iv_subreg (iv, GET_MODE (op));
+ }
+
+ if (!inv)
+ {
+ regno = REGNO (op);
+ if (!last_def[regno])
+ inv = true;
+ else if (last_def[regno] == const0_rtx)
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, " not simple.\n");
+ return false;
+ }
+ }
+
+ if (inv)
+ {
+ iv_constant (iv, op, VOIDmode);
+
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file, " ");
+ dump_iv_info (rtl_dump_file, iv);
+ fprintf (rtl_dump_file, "\n");
+ }
+ return true;
+ }
+
+ def_insn = iv_get_reaching_def (insn, op);
+ if (def_insn == const0_rtx)
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, " not simple.\n");
+ return false;
+ }
+
+ return iv_analyse (def_insn, op, iv);
+}
+
+/* Analyses iv DEF defined in INSN and stores the result to *IV. */
+
+bool
+iv_analyse (rtx insn, rtx def, struct rtx_iv *iv)
+{
+ unsigned uid;
+ rtx set, rhs, mby = NULL_RTX, tmp;
+ rtx op0 = NULL_RTX, op1 = NULL_RTX;
+ struct rtx_iv iv0, iv1;
+ enum machine_mode amode;
+ enum rtx_code code;
+
+ if (insn == const0_rtx)
+ return false;
+
+ if (GET_CODE (def) == SUBREG)
+ {
+ if (!subreg_lowpart_p (def))
+ return false;
+
+ if (!iv_analyse (insn, SUBREG_REG (def), iv))
+ return false;
+
+ return iv_subreg (iv, GET_MODE (def));
+ }
+
+ if (!insn)
+ return iv_analyse_biv (def, iv);
+
+ if (rtl_dump_file)
+ {
+ fprintf (rtl_dump_file, "Analysing def of ");
+ print_rtl (rtl_dump_file, def);
+ fprintf (rtl_dump_file, " in insn ");
+ print_rtl_single (rtl_dump_file, insn);
+ }
+
+ uid = INSN_UID (insn);
+ if (insn_info[uid].iv.analysed)
+ {
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, " already analysed.\n");
+ *iv = insn_info[uid].iv;
+ return iv->base != NULL_RTX;
+ }
+
+ iv->mode = VOIDmode;
+ iv->base = NULL_RTX;
+ iv->step = NULL_RTX;
+
+ set = single_set (insn);
+ rhs = find_reg_equal_equiv_note (insn);
+ if (!rhs)
+ rhs = SET_SRC (set);
+ code = GET_CODE (rhs);
+
+ if (CONSTANT_P (rhs))
+ {
+ op0 = rhs;
+ amode = GET_MODE (def);
+ }
+ else
+ {
+ switch (code)
+ {
+ case SUBREG:
+ if (!subreg_lowpart_p (rhs))
+ goto end;
+ op0 = rhs;
+ break;
+
+ case REG:
+ op0 = rhs;
+ break;
+
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ case NEG:
+ op0 = XEXP (rhs, 0);
+ break;
+
+ case PLUS:
+ case MINUS:
+ op0 = XEXP (rhs, 0);
+ op1 = XEXP (rhs, 1);
+ break;
+
+ case MULT:
+ op0 = XEXP (rhs, 0);
+ mby = XEXP (rhs, 1);
+ if (!CONSTANT_P (mby))
+ {
+ if (!CONSTANT_P (op0))
+ abort ();
+ tmp = op0;
+ op0 = mby;
+ mby = tmp;
+ }
+ break;
+
+ default:
+ abort ();
+ }
+
+ amode = GET_MODE (rhs);
+ }
+
+ if (op0)
+ {
+ if (!iv_analyse_op (insn, op0, &iv0))
+ goto end;
+
+ if (iv0.mode == VOIDmode)
+ {
+ iv0.mode = amode;
+ iv0.extend_mode = amode;
+ }
+ }
+
+ if (op1)
+ {
+ if (!iv_analyse_op (insn, op1, &iv1))
+ goto end;
+
+ if (iv1.mode == VOIDmode)
+ {
+ iv1.mode = amode;
+ iv1.extend_mode = amode;
+ }
+ }
+
+ switch (code)
+ {
+ case SIGN_EXTEND:
+ case ZERO_EXTEND:
+ if (!iv_extend (&iv0, code, amode))
+ goto end;
+ break;
+
+ case NEG:
+ if (!iv_neg (&iv0))
+ goto end;
+ break;
+
+ case PLUS:
+ case MINUS:
+ if (!iv_add (&iv0, &iv1, code))
+ goto end;
+ break;
+
+ case MULT:
+ if (!iv_mult (&iv0, mby))
+ goto end;
+ break;
+
+ default:
+ break;
+ }
+
+ *iv = iv0;
+
+end:
+ iv->analysed = true;
+ insn_info[uid].iv = *iv;
+
+ if (rtl_dump_file)
+ {
+ print_rtl (rtl_dump_file, def);
+ fprintf (rtl_dump_file, " in insn ");
+ print_rtl_single (rtl_dump_file, insn);
+ fprintf (rtl_dump_file, " is ");
+ dump_iv_info (rtl_dump_file, iv);
+ fprintf (rtl_dump_file, "\n");
+ }
+
+ return iv->base != NULL_RTX;
+}
+
+/* Calculates value of IV at ITERATION-th iteration. */
+
+rtx
+get_iv_value (struct rtx_iv *iv, rtx iteration)
+{
+ rtx val;
+
+ /* We would need to generate some if_then_else patterns, and so far
+ it is not needed anywhere. */
+ if (iv->first_special)
+ abort ();
+
+ if (iv->step != const0_rtx && iteration != const0_rtx)
+ val = simplify_gen_binary (PLUS, iv->extend_mode, iv->base,
+ simplify_gen_binary (MULT, iv->extend_mode,
+ iv->step, iteration));
+ else
+ val = iv->base;
+
+ if (iv->extend_mode == iv->mode)
+ return val;
+
+ val = lowpart_subreg (iv->mode, val, iv->extend_mode);
+
+ if (iv->extend == NIL)
+ return val;
+
+ val = simplify_gen_unary (iv->extend, iv->extend_mode, val, iv->mode);
+ val = simplify_gen_binary (PLUS, iv->extend_mode, iv->delta,
+ simplify_gen_binary (MULT, iv->extend_mode,
+ iv->mult, val));
+
+ return val;
+}
+
+/* Free the data for an induction variable analysis. */
+
+void
+iv_analysis_done (void)
+{
+ max_insn_no = 0;
+ max_reg_no = 0;
+ if (insn_info)
+ {
+ free (insn_info);
+ insn_info = NULL;
+ }
+ if (last_def)
+ {
+ free (last_def);
+ last_def = NULL;
+ }
+ if (bivs)
+ {
+ free (bivs);
+ bivs = NULL;
+ }
+}
+
+/* Computes inverse to X modulo (1 << MOD). */
+
+static unsigned HOST_WIDEST_INT
+inverse (unsigned HOST_WIDEST_INT x, int mod)
+{
+ unsigned HOST_WIDEST_INT mask =
+ ((unsigned HOST_WIDEST_INT) 1 << (mod - 1) << 1) - 1;
+ unsigned HOST_WIDEST_INT rslt = 1;
+ int i;
+
+ for (i = 0; i < mod - 1; i++)
+ {
+ rslt = (rslt * x) & mask;
+ x = (x * x) & mask;
+ }
+
+ return rslt;
+}
+
+/* Tries to estimate the maximum number of iterations. */
+
+static unsigned HOST_WIDEST_INT
+determine_max_iter (struct niter_desc *desc)
+{
+ rtx niter = desc->niter_expr;
+ rtx mmin, mmax, left, right;
+ unsigned HOST_WIDEST_INT nmax, inc;
+
+ if (GET_CODE (niter) == AND
+ && GET_CODE (XEXP (niter, 0)) == CONST_INT)
+ {
+ nmax = INTVAL (XEXP (niter, 0));
+ if (!(nmax & (nmax + 1)))
+ {
+ desc->niter_max = nmax;
+ return nmax;
+ }
+ }
+
+ get_mode_bounds (desc->mode, desc->signed_p, &mmin, &mmax);
+ nmax = INTVAL (mmax) - INTVAL (mmin);
+
+ if (GET_CODE (niter) == UDIV)
+ {
+ if (GET_CODE (XEXP (niter, 1)) != CONST_INT)
+ {
+ desc->niter_max = nmax;
+ return nmax;
+ }
+ inc = INTVAL (XEXP (niter, 1));
+ niter = XEXP (niter, 0);
+ }
+ else
+ inc = 1;
+
+ if (GET_CODE (niter) == PLUS)
+ {
+ left = XEXP (niter, 0);
+ right = XEXP (niter, 0);
+
+ if (GET_CODE (right) == CONST_INT)
+ right = GEN_INT (-INTVAL (right));
+ }
+ else if (GET_CODE (niter) == MINUS)
+ {
+ left = XEXP (niter, 0);
+ right = XEXP (niter, 0);
+ }
+ else
+ {
+ left = niter;
+ right = mmin;
+ }
+
+ if (GET_CODE (left) == CONST_INT)
+ mmax = left;
+ if (GET_CODE (right) == CONST_INT)
+ mmin = right;
+ nmax = INTVAL (mmax) - INTVAL (mmin);
+
+ desc->niter_max = nmax / inc;
+ return nmax / inc;
+}
+
+/* Checks whether register *REG is in set ALT. Callback for for_each_rtx. */
+
+static int
+altered_reg_used (rtx *reg, void *alt)
+{
+ if (!REG_P (*reg))
+ return 0;
+
+ return REGNO_REG_SET_P (alt, REGNO (*reg));
+}
+
+/* Marks registers altered by EXPR in set ALT. */
+
+static void
+mark_altered (rtx expr, rtx by ATTRIBUTE_UNUSED, void *alt)
+{
+ if (GET_CODE (expr) == SUBREG)
+ expr = SUBREG_REG (expr);
+ if (!REG_P (expr))
+ return;
+
+ SET_REGNO_REG_SET (alt, REGNO (expr));
+}
+
+/* Checks whether RHS is simple enough to process. */
+
+static bool
+simple_rhs_p (rtx rhs)
+{
+ rtx op0, op1;
+
+ if (CONSTANT_P (rhs)
+ || REG_P (rhs))
+ return true;
+
+ switch (GET_CODE (rhs))
+ {
+ case PLUS:
+ case MINUS:
+ op0 = XEXP (rhs, 0);
+ op1 = XEXP (rhs, 1);
+ /* Allow reg + const sets only. */
+ if (REG_P (op0) && CONSTANT_P (op1))
+ return true;
+ if (REG_P (op1) && CONSTANT_P (op0))
+ return true;
+
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+/* Simplifies *EXPR using assignment in INSN. ALTERED is the set of registers
+ altered so far. */
+
+static void
+simplify_using_assignment (rtx insn, rtx *expr, regset altered)
+{
+ rtx set = single_set (insn);
+ rtx lhs, rhs;
+ bool ret = false;
+
+ if (set)
+ {
+ lhs = SET_DEST (set);
+ if (GET_CODE (lhs) != REG
+ || altered_reg_used (&lhs, altered))
+ ret = true;
+ }
+ else
+ ret = true;
+
+ note_stores (PATTERN (insn), mark_altered, altered);
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ int i;
+
+ /* Kill all call clobbered registers. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i))
+ SET_REGNO_REG_SET (altered, i);
+ }
+
+ if (ret)
+ return;
+
+ rhs = find_reg_equal_equiv_note (insn);
+ if (!rhs)
+ rhs = SET_SRC (set);
+
+ if (!simple_rhs_p (rhs))
+ return;
+
+ if (for_each_rtx (&rhs, altered_reg_used, altered))
+ return;
+
+ *expr = simplify_replace_rtx (*expr, lhs, rhs);
+}
+
+/* Checks whether A implies B. */
+
+static bool
+implies_p (rtx a, rtx b)
+{
+ rtx op0, op1, r;
+
+ if (GET_CODE (a) == EQ)
+ {
+ op0 = XEXP (a, 0);
+ op1 = XEXP (a, 1);
+
+ if (REG_P (op0))
+ {
+ r = simplify_replace_rtx (b, op0, op1);
+ if (r == const_true_rtx)
+ return true;
+ }
+
+ if (REG_P (op1))
+ {
+ r = simplify_replace_rtx (b, op1, op0);
+ if (r == const_true_rtx)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Canonicalizes COND so that
+
+ (1) Ensure that operands are ordered according to
+ swap_commutative_operands_p.
+ (2) (LE x const) will be replaced with (LT x <const+1>) and similarly
+ for GE, GEU, and LEU. */
+
+rtx
+canon_condition (rtx cond)
+{
+ rtx tem;
+ rtx op0, op1;
+ enum rtx_code code;
+ enum machine_mode mode;
+
+ code = GET_CODE (cond);
+ op0 = XEXP (cond, 0);
+ op1 = XEXP (cond, 1);
+
+ if (swap_commutative_operands_p (op0, op1))
+ {
+ code = swap_condition (code);
+ tem = op0;
+ op0 = op1;
+ op1 = tem;
+ }
+
+ mode = GET_MODE (op0);
+ if (mode == VOIDmode)
+ mode = GET_MODE (op1);
+ if (mode == VOIDmode)
+ abort ();
+
+ if (GET_CODE (op1) == CONST_INT
+ && GET_MODE_CLASS (mode) != MODE_CC
+ && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
+ {
+ HOST_WIDE_INT const_val = INTVAL (op1);
+ unsigned HOST_WIDE_INT uconst_val = const_val;
+ unsigned HOST_WIDE_INT max_val
+ = (unsigned HOST_WIDE_INT) GET_MODE_MASK (mode);
+
+ switch (code)
+ {
+ case LE:
+ if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1)
+ code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0));
+ break;
+
+ /* When cross-compiling, const_val might be sign-extended from
+ BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */
+ case GE:
+ if ((HOST_WIDE_INT) (const_val & max_val)
+ != (((HOST_WIDE_INT) 1
+ << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1))))
+ code = GT, op1 = gen_int_mode (const_val - 1, mode);
+ break;
+
+ case LEU:
+ if (uconst_val < max_val)
+ code = LTU, op1 = gen_int_mode (uconst_val + 1, mode);
+ break;
+
+ case GEU:
+ if (uconst_val != 0)
+ code = GTU, op1 = gen_int_mode (uconst_val - 1, mode);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (op0 != XEXP (cond, 0)
+ || op1 != XEXP (cond, 1)
+ || code != GET_CODE (cond)
+ || GET_MODE (cond) != SImode)
+ cond = gen_rtx_fmt_ee (code, SImode, op0, op1);
+
+ return cond;
+}
+
+/* Tries to use the fact that COND holds to simplify EXPR. ALTERED is the
+ set of altered regs. */
+
+void
+simplify_using_condition (rtx cond, rtx *expr, regset altered)
+{
+ rtx rev, reve, exp = *expr;
+
+ if (GET_RTX_CLASS (GET_CODE (*expr)) != '<')
+ return;
+
+ /* If some register gets altered later, we do not really speak about its
+ value at the time of comparison. */
+ if (altered
+ && for_each_rtx (&cond, altered_reg_used, altered))
+ return;
+
+ rev = reversed_condition (cond);
+ reve = reversed_condition (exp);
+
+ cond = canon_condition (cond);
+ exp = canon_condition (exp);
+ if (rev)
+ rev = canon_condition (rev);
+ if (reve)
+ reve = canon_condition (reve);
+
+ if (rtx_equal_p (exp, cond))
+ {
+ *expr = const_true_rtx;
+ return;
+ }
+
+
+ if (rev && rtx_equal_p (exp, rev))
+ {
+ *expr = const0_rtx;
+ return;
+ }
+
+ if (implies_p (cond, exp))
+ {
+ *expr = const_true_rtx;
+ return;
+ }
+
+ if (reve && implies_p (cond, reve))
+ {
+ *expr = const0_rtx;
+ return;
+ }
+
+ /* A proof by contradiction. If *EXPR implies (not cond), *EXPR must
+ be false. */
+ if (rev && implies_p (exp, rev))
+ {
+ *expr = const0_rtx;
+ return;
+ }
+
+ /* Similarly, If (not *EXPR) implies (not cond), *EXPR must be true. */
+ if (rev && reve && implies_p (reve, rev))
+ {
+ *expr = const_true_rtx;
+ return;
+ }
+
+ /* We would like to have some other tests here. TODO. */
+
+ return;
+}
+
+/* Use relationship between A and *B to eventually eliminate *B.
+ OP is the operation we consider. */
+
+static void
+eliminate_implied_condition (enum rtx_code op, rtx a, rtx *b)
+{
+ if (op == AND)
+ {
+ /* If A implies *B, we may replace *B by true. */
+ if (implies_p (a, *b))
+ *b = const_true_rtx;
+ }
+ else if (op == IOR)
+ {
+ /* If *B implies A, we may replace *B by false. */
+ if (implies_p (*b, a))
+ *b = const0_rtx;
+ }
+ else
+ abort ();
+}
+
+/* Eliminates the conditions in TAIL that are implied by HEAD. OP is the
+ operation we consider. */
+
+static void
+eliminate_implied_conditions (enum rtx_code op, rtx *head, rtx tail)
+{
+ rtx elt;
+
+ for (elt = tail; elt; elt = XEXP (elt, 1))
+ eliminate_implied_condition (op, *head, &XEXP (elt, 0));
+ for (elt = tail; elt; elt = XEXP (elt, 1))
+ eliminate_implied_condition (op, XEXP (elt, 0), head);
+}
+
+/* Simplifies *EXPR using initial values at the start of the LOOP. If *EXPR
+ is a list, its elements are assumed to be combined using OP. */
+
+static void
+simplify_using_initial_values (struct loop *loop, enum rtx_code op, rtx *expr)
+{
+ rtx head, tail, insn;
+ rtx neutral, aggr;
+ regset altered;
+ regset_head altered_head;
+ edge e;
+
+ if (!*expr)
+ return;
+
+ if (CONSTANT_P (*expr))
+ return;
+
+ if (GET_CODE (*expr) == EXPR_LIST)
+ {
+ head = XEXP (*expr, 0);
+ tail = XEXP (*expr, 1);
+
+ eliminate_implied_conditions (op, &head, tail);
+
+ if (op == AND)
+ {
+ neutral = const_true_rtx;
+ aggr = const0_rtx;
+ }
+ else if (op == IOR)
+ {
+ neutral = const0_rtx;
+ aggr = const_true_rtx;
+ }
+ else
+ abort ();
+
+ simplify_using_initial_values (loop, NIL, &head);
+ if (head == aggr)
+ {
+ XEXP (*expr, 0) = aggr;
+ XEXP (*expr, 1) = NULL_RTX;
+ return;
+ }
+ else if (head == neutral)
+ {
+ *expr = tail;
+ simplify_using_initial_values (loop, op, expr);
+ return;
+ }
+ simplify_using_initial_values (loop, op, &tail);
+
+ if (tail && XEXP (tail, 0) == aggr)
+ {
+ *expr = tail;
+ return;
+ }
+
+ XEXP (*expr, 0) = head;
+ XEXP (*expr, 1) = tail;
+ return;
+ }
+
+ if (op != NIL)
+ abort ();
+
+ e = loop_preheader_edge (loop);
+ if (e->src == ENTRY_BLOCK_PTR)
+ return;
+
+ altered = INITIALIZE_REG_SET (altered_head);
+
+ while (1)
+ {
+ insn = BB_END (e->src);
+ if (any_condjump_p (insn))
+ {
+ /* FIXME -- slightly wrong -- what if compared register
+ gets altered between start of the condition and insn? */
+ rtx cond = get_condition (BB_END (e->src), NULL, false);
+
+ if (cond && (e->flags & EDGE_FALLTHRU))
+ cond = reversed_condition (cond);
+ if (cond)
+ {
+ simplify_using_condition (cond, expr, altered);
+ if (CONSTANT_P (*expr))
+ {
+ FREE_REG_SET (altered);
+ return;
+ }
+ }
+ }
+
+ FOR_BB_INSNS_REVERSE (e->src, insn)
+ {
+ if (!INSN_P (insn))
+ continue;
+
+ simplify_using_assignment (insn, expr, altered);
+ if (CONSTANT_P (*expr))
+ {
+ FREE_REG_SET (altered);
+ return;
+ }
+ }
+
+ e = e->src->pred;
+ if (e->pred_next
+ || e->src == ENTRY_BLOCK_PTR)
+ break;
+ }
+
+ FREE_REG_SET (altered);
+}
+
+/* Transforms invariant IV into MODE. Adds assumptions based on the fact
+ that IV occurs as left operands of comparison COND and its signedness
+ is SIGNED_P to DESC. */
+
+static void
+shorten_into_mode (struct rtx_iv *iv, enum machine_mode mode,
+ enum rtx_code cond, bool signed_p, struct niter_desc *desc)
+{
+ rtx mmin, mmax, cond_over, cond_under;
+
+ get_mode_bounds (mode, signed_p, &mmin, &mmax);
+ cond_under = simplify_gen_relational (LT, SImode, iv->extend_mode,
+ iv->base, mmin);
+ cond_over = simplify_gen_relational (GT, SImode, iv->extend_mode,
+ iv->base, mmax);
+
+ switch (cond)
+ {
+ case LE:
+ case LT:
+ case LEU:
+ case LTU:
+ if (cond_under != const0_rtx)
+ desc->infinite =
+ alloc_EXPR_LIST (0, cond_under, desc->infinite);
+ if (cond_over != const0_rtx)
+ desc->noloop_assumptions =
+ alloc_EXPR_LIST (0, cond_over, desc->noloop_assumptions);
+ break;
+
+ case GE:
+ case GT:
+ case GEU:
+ case GTU:
+ if (cond_over != const0_rtx)
+ desc->infinite =
+ alloc_EXPR_LIST (0, cond_over, desc->infinite);
+ if (cond_under != const0_rtx)
+ desc->noloop_assumptions =
+ alloc_EXPR_LIST (0, cond_under, desc->noloop_assumptions);
+ break;
+
+ case NE:
+ if (cond_over != const0_rtx)
+ desc->infinite =
+ alloc_EXPR_LIST (0, cond_over, desc->infinite);
+ if (cond_under != const0_rtx)
+ desc->infinite =
+ alloc_EXPR_LIST (0, cond_under, desc->infinite);
+ break;
+
+ default:
+ abort ();
+ }
+
+ iv->mode = mode;
+ iv->extend = signed_p ? SIGN_EXTEND : ZERO_EXTEND;
+}
+
+/* Transforms IV0 and IV1 compared by COND so that they are both compared as
+ subregs of the same mode if possible (sometimes it is neccesary to add
+ some assumptions to DESC). */
+
+static bool
+canonicalize_iv_subregs (struct rtx_iv *iv0, struct rtx_iv *iv1,
+ enum rtx_code cond, struct niter_desc *desc)
+{
+ enum machine_mode comp_mode;
+ bool signed_p;
+
+ /* If the ivs behave specially in the first iteration, or are
+ added/multiplied after extending, we ignore them. */
+ if (iv0->first_special || iv0->mult != const1_rtx || iv0->delta != const0_rtx)
+ return false;
+ if (iv1->first_special || iv1->mult != const1_rtx || iv1->delta != const0_rtx)
+ return false;
+
+ /* If there is some extend, it must match signedness of the comparison. */
+ switch (cond)
+ {
+ case LE:
+ case LT:
+ if (iv0->extend == ZERO_EXTEND
+ || iv1->extend == ZERO_EXTEND)
+ return false;
+ signed_p = true;
+ break;
+
+ case LEU:
+ case LTU:
+ if (iv0->extend == SIGN_EXTEND
+ || iv1->extend == SIGN_EXTEND)
+ return false;
+ signed_p = false;
+ break;
+
+ case NE:
+ if (iv0->extend != NIL
+ && iv1->extend != NIL
+ && iv0->extend != iv1->extend)
+ return false;
+
+ signed_p = false;
+ if (iv0->extend != NIL)
+ signed_p = iv0->extend == SIGN_EXTEND;
+ if (iv1->extend != NIL)
+ signed_p = iv1->extend == SIGN_EXTEND;
+ break;
+
+ default:
+ abort ();
+ }
+
+ /* Values of both variables should be computed in the same mode. These
+ might indeed be different, if we have comparison like
+
+ (compare (subreg:SI (iv0)) (subreg:SI (iv1)))
+
+ and iv0 and iv1 are both ivs iterating in SI mode, but calculated
+ in different modes. This does not seem impossible to handle, but
+ it hardly ever occurs in practice.
+
+ The only exception is the case when one of operands is invariant.
+ For example pentium 3 generates comparisons like
+ (lt (subreg:HI (reg:SI)) 100). Here we assign HImode to 100, but we
+ definitely do not want this prevent the optimization. */
+ comp_mode = iv0->extend_mode;
+ if (GET_MODE_BITSIZE (comp_mode) < GET_MODE_BITSIZE (iv1->extend_mode))
+ comp_mode = iv1->extend_mode;
+
+ if (iv0->extend_mode != comp_mode)
+ {
+ if (iv0->mode != iv0->extend_mode
+ || iv0->step != const0_rtx)
+ return false;
+
+ iv0->base = simplify_gen_unary (signed_p ? SIGN_EXTEND : ZERO_EXTEND,
+ comp_mode, iv0->base, iv0->mode);
+ iv0->extend_mode = comp_mode;
+ }
+
+ if (iv1->extend_mode != comp_mode)
+ {
+ if (iv1->mode != iv1->extend_mode
+ || iv1->step != const0_rtx)
+ return false;
+
+ iv1->base = simplify_gen_unary (signed_p ? SIGN_EXTEND : ZERO_EXTEND,
+ comp_mode, iv1->base, iv1->mode);
+ iv1->extend_mode = comp_mode;
+ }
+
+ /* Check that both ivs belong to a range of a single mode. If one of the
+ operands is an invariant, we may need to shorten it into the common
+ mode. */
+ if (iv0->mode == iv0->extend_mode
+ && iv0->step == const0_rtx
+ && iv0->mode != iv1->mode)
+ shorten_into_mode (iv0, iv1->mode, cond, signed_p, desc);
+
+ if (iv1->mode == iv1->extend_mode
+ && iv1->step == const0_rtx
+ && iv0->mode != iv1->mode)
+ shorten_into_mode (iv1, iv0->mode, swap_condition (cond), signed_p, desc);
+
+ if (iv0->mode != iv1->mode)
+ return false;
+
+ desc->mode = iv0->mode;
+ desc->signed_p = signed_p;
+
+ return true;
+}
+
+/* Computes number of iterations of the CONDITION in INSN in LOOP and stores
+ the result into DESC. Very similar to determine_number_of_iterations
+ (basically its rtl version), complicated by things like subregs. */
+
+void
+iv_number_of_iterations (struct loop *loop, rtx insn, rtx condition,
+ struct niter_desc *desc)
+{
+ rtx op0, op1, delta, step, bound, may_xform, def_insn, tmp, tmp0, tmp1;
+ struct rtx_iv iv0, iv1, tmp_iv;
+ rtx assumption;
+ enum rtx_code cond;
+ enum machine_mode mode, comp_mode;
+ rtx mmin, mmax;
+ unsigned HOST_WIDEST_INT s, size, d;
+ HOST_WIDEST_INT up, down, inc;
+ int was_sharp = false;
+
+ /* The meaning of these assumptions is this:
+ if !assumptions
+ then the rest of information does not have to be valid
+ if noloop_assumptions then the loop does not roll
+ if infinite then this exit is never used */
+
+ desc->assumptions = NULL_RTX;
+ desc->noloop_assumptions = NULL_RTX;
+ desc->infinite = NULL_RTX;
+ desc->simple_p = true;
+
+ desc->const_iter = false;
+ desc->niter_expr = NULL_RTX;
+ desc->niter_max = 0;
+
+ cond = GET_CODE (condition);
+ if (GET_RTX_CLASS (cond) != '<')
+ abort ();
+
+ mode = GET_MODE (XEXP (condition, 0));
+ if (mode == VOIDmode)
+ mode = GET_MODE (XEXP (condition, 1));
+ /* The constant comparisons should be folded. */
+ if (mode == VOIDmode)
+ abort ();
+
+ /* We only handle integers or pointers. */
+ if (GET_MODE_CLASS (mode) != MODE_INT
+ && GET_MODE_CLASS (mode) != MODE_PARTIAL_INT)
+ goto fail;
+
+ op0 = XEXP (condition, 0);
+ def_insn = iv_get_reaching_def (insn, op0);
+ if (!iv_analyse (def_insn, op0, &iv0))
+ goto fail;
+ if (iv0.extend_mode == VOIDmode)
+ iv0.mode = iv0.extend_mode = mode;
+
+ op1 = XEXP (condition, 1);
+ def_insn = iv_get_reaching_def (insn, op1);
+ if (!iv_analyse (def_insn, op1, &iv1))
+ goto fail;
+ if (iv1.extend_mode == VOIDmode)
+ iv1.mode = iv1.extend_mode = mode;
+
+ if (GET_MODE_BITSIZE (iv0.extend_mode) > HOST_BITS_PER_WIDE_INT
+ || GET_MODE_BITSIZE (iv1.extend_mode) > HOST_BITS_PER_WIDE_INT)
+ goto fail;
+
+ /* Check condition and normalize it. */
+
+ switch (cond)
+ {
+ case GE:
+ case GT:
+ case GEU:
+ case GTU:
+ tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
+ cond = swap_condition (cond);
+ break;
+ case NE:
+ case LE:
+ case LEU:
+ case LT:
+ case LTU:
+ break;
+ default:
+ goto fail;
+ }
+
+ /* Handle extends. This is relatively nontrivial, so we only try in some
+ easy cases, when we can canonicalize the ivs (possibly by adding some
+ assumptions) to shape subreg (base + i * step). This function also fills
+ in desc->mode and desc->signed_p. */
+
+ if (!canonicalize_iv_subregs (&iv0, &iv1, cond, desc))
+ goto fail;
+
+ comp_mode = iv0.extend_mode;
+ mode = iv0.mode;
+ size = GET_MODE_BITSIZE (mode);
+ get_mode_bounds (mode, (cond == LE || cond == LT), &mmin, &mmax);
+
+ if (GET_CODE (iv0.step) != CONST_INT || GET_CODE (iv1.step) != CONST_INT)
+ goto fail;
+
+ /* We can take care of the case of two induction variables chasing each other
+ if the test is NE. I have never seen a loop using it, but still it is
+ cool. */
+ if (iv0.step != const0_rtx && iv1.step != const0_rtx)
+ {
+ if (cond != NE)
+ goto fail;
+
+ iv0.step = simplify_gen_binary (MINUS, comp_mode, iv0.step, iv1.step);
+ iv1.step = const0_rtx;
+ }
+
+ /* This is either infinite loop or the one that ends immediately, depending
+ on initial values. Unswitching should remove this kind of conditions. */
+ if (iv0.step == const0_rtx && iv1.step == const0_rtx)
+ goto fail;
+
+ /* Ignore loops of while (i-- < 10) type. */
+ if (cond != NE
+ && (INTVAL (iv0.step) < 0 || INTVAL (iv1.step) > 0))
+ goto fail;
+
+ /* Some more condition normalization. We must record some assumptions
+ due to overflows. */
+ switch (cond)
+ {
+ case LT:
+ case LTU:
+ /* We want to take care only of non-sharp relationals; this is easy,
+ as in cases the overflow would make the transformation unsafe
+ the loop does not roll. Seemingly it would make more sense to want
+ to take care of sharp relationals instead, as NE is more similar to
+ them, but the problem is that here the transformation would be more
+ difficult due to possibly infinite loops. */
+ if (iv0.step == const0_rtx)
+ {
+ tmp = lowpart_subreg (mode, iv0.base, comp_mode);
+ assumption = simplify_gen_relational (EQ, SImode, mode, tmp, mmax);
+ if (assumption == const_true_rtx)
+ goto zero_iter;
+ iv0.base = simplify_gen_binary (PLUS, comp_mode,
+ iv0.base, const1_rtx);
+ }
+ else
+ {
+ tmp = lowpart_subreg (mode, iv1.base, comp_mode);
+ assumption = simplify_gen_relational (EQ, SImode, mode, tmp, mmin);
+ if (assumption == const_true_rtx)
+ goto zero_iter;
+ iv1.base = simplify_gen_binary (PLUS, comp_mode,
+ iv1.base, constm1_rtx);
+ }
+
+ if (assumption != const0_rtx)
+ desc->noloop_assumptions =
+ alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+ cond = (cond == LT) ? LE : LEU;
+
+ /* It will be useful to be able to tell the difference once more in
+ LE -> NE reduction. */
+ was_sharp = true;
+ break;
+ default: ;
+ }
+
+ /* Take care of trivially infinite loops. */
+ if (cond != NE)
+ {
+ if (iv0.step == const0_rtx)
+ {
+ tmp = lowpart_subreg (mode, iv0.base, comp_mode);
+ if (rtx_equal_p (tmp, mmin))
+ {
+ desc->infinite =
+ alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX);
+ return;
+ }
+ }
+ else
+ {
+ tmp = lowpart_subreg (mode, iv1.base, comp_mode);
+ if (rtx_equal_p (tmp, mmax))
+ {
+ desc->infinite =
+ alloc_EXPR_LIST (0, const_true_rtx, NULL_RTX);
+ return;
+ }
+ }
+ }
+
+ /* If we can we want to take care of NE conditions instead of size
+ comparisons, as they are much more friendly (most importantly
+ this takes care of special handling of loops with step 1). We can
+ do it if we first check that upper bound is greater or equal to
+ lower bound, their difference is constant c modulo step and that
+ there is not an overflow. */
+ if (cond != NE)
+ {
+ if (iv0.step == const0_rtx)
+ step = simplify_gen_unary (NEG, comp_mode, iv1.step, comp_mode);
+ else
+ step = iv0.step;
+ delta = simplify_gen_binary (MINUS, comp_mode, iv1.base, iv0.base);
+ delta = lowpart_subreg (mode, delta, comp_mode);
+ delta = simplify_gen_binary (UMOD, mode, delta, step);
+ may_xform = const0_rtx;
+
+ if (GET_CODE (delta) == CONST_INT)
+ {
+ if (was_sharp && INTVAL (delta) == INTVAL (step) - 1)
+ {
+ /* A special case. We have transformed condition of type
+ for (i = 0; i < 4; i += 4)
+ into
+ for (i = 0; i <= 3; i += 4)
+ obviously if the test for overflow during that transformation
+ passed, we cannot overflow here. Most importantly any
+ loop with sharp end condition and step 1 falls into this
+ cathegory, so handling this case specially is definitely
+ worth the troubles. */
+ may_xform = const_true_rtx;
+ }
+ else if (iv0.step == const0_rtx)
+ {
+ bound = simplify_gen_binary (PLUS, comp_mode, mmin, step);
+ bound = simplify_gen_binary (MINUS, comp_mode, bound, delta);
+ bound = lowpart_subreg (mode, bound, comp_mode);
+ tmp = lowpart_subreg (mode, iv0.base, comp_mode);
+ may_xform = simplify_gen_relational (cond, SImode, mode,
+ bound, tmp);
+ }
+ else
+ {
+ bound = simplify_gen_binary (MINUS, comp_mode, mmax, step);
+ bound = simplify_gen_binary (PLUS, comp_mode, bound, delta);
+ bound = lowpart_subreg (mode, bound, comp_mode);
+ tmp = lowpart_subreg (mode, iv1.base, comp_mode);
+ may_xform = simplify_gen_relational (cond, SImode, mode,
+ tmp, bound);
+ }
+ }
+
+ if (may_xform != const0_rtx)
+ {
+ /* We perform the transformation always provided that it is not
+ completely senseless. This is OK, as we would need this assumption
+ to determine the number of iterations anyway. */
+ if (may_xform != const_true_rtx)
+ desc->assumptions = alloc_EXPR_LIST (0, may_xform,
+ desc->assumptions);
+
+ /* We are going to lose some information about upper bound on
+ number of iterations in this step, so record the information
+ here. */
+ inc = INTVAL (iv0.step) - INTVAL (iv1.step);
+ if (GET_CODE (iv1.base) == CONST_INT)
+ up = INTVAL (iv1.base);
+ else
+ up = INTVAL (mmax) - inc;
+ down = INTVAL (GET_CODE (iv0.base) == CONST_INT ? iv0.base : mmin);
+ desc->niter_max = (up - down) / inc + 1;
+
+ if (iv0.step == const0_rtx)
+ {
+ iv0.base = simplify_gen_binary (PLUS, comp_mode, iv0.base, delta);
+ iv0.base = simplify_gen_binary (MINUS, comp_mode, iv0.base, step);
+ }
+ else
+ {
+ iv1.base = simplify_gen_binary (MINUS, comp_mode, iv1.base, delta);
+ iv1.base = simplify_gen_binary (PLUS, comp_mode, iv1.base, step);
+ }
+
+ tmp0 = lowpart_subreg (mode, iv0.base, comp_mode);
+ tmp1 = lowpart_subreg (mode, iv1.base, comp_mode);
+ assumption = simplify_gen_relational (reverse_condition (cond),
+ SImode, mode, tmp0, tmp1);
+ if (assumption == const_true_rtx)
+ goto zero_iter;
+ else if (assumption != const0_rtx)
+ desc->noloop_assumptions =
+ alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+ cond = NE;
+ }
+ }
+
+ /* Count the number of iterations. */
+ if (cond == NE)
+ {
+ /* Everything we do here is just arithmetics modulo size of mode. This
+ makes us able to do more involved computations of number of iterations
+ than in other cases. First transform the condition into shape
+ s * i <> c, with s positive. */
+ iv1.base = simplify_gen_binary (MINUS, comp_mode, iv1.base, iv0.base);
+ iv0.base = const0_rtx;
+ iv0.step = simplify_gen_binary (MINUS, comp_mode, iv0.step, iv1.step);
+ iv1.step = const0_rtx;
+ if (INTVAL (iv0.step) < 0)
+ {
+ iv0.step = simplify_gen_unary (NEG, comp_mode, iv0.step, mode);
+ iv1.base = simplify_gen_unary (NEG, comp_mode, iv1.base, mode);
+ }
+ iv0.step = lowpart_subreg (mode, iv0.step, comp_mode);
+
+ /* Let nsd (s, size of mode) = d. If d does not divide c, the loop
+ is infinite. Otherwise, the number of iterations is
+ (inverse(s/d) * (c/d)) mod (size of mode/d). */
+ s = INTVAL (iv0.step); d = 1;
+ while (s % 2 != 1)
+ {
+ s /= 2;
+ d *= 2;
+ size--;
+ }
+ bound = GEN_INT (((unsigned HOST_WIDEST_INT) 1 << (size - 1 ) << 1) - 1);
+
+ tmp1 = lowpart_subreg (mode, iv1.base, comp_mode);
+ tmp = simplify_gen_binary (UMOD, mode, tmp1, GEN_INT (d));
+ assumption = simplify_gen_relational (NE, SImode, mode, tmp, const0_rtx);
+ desc->infinite = alloc_EXPR_LIST (0, assumption, desc->infinite);
+
+ tmp = simplify_gen_binary (UDIV, mode, tmp1, GEN_INT (d));
+ tmp = simplify_gen_binary (MULT, mode,
+ tmp, GEN_INT (inverse (s, size)));
+ desc->niter_expr = simplify_gen_binary (AND, mode, tmp, bound);
+ }
+ else
+ {
+ if (iv1.step == const0_rtx)
+ /* Condition in shape a + s * i <= b
+ We must know that b + s does not overflow and a <= b + s and then we
+ can compute number of iterations as (b + s - a) / s. (It might
+ seem that we in fact could be more clever about testing the b + s
+ overflow condition using some information about b - a mod s,
+ but it was already taken into account during LE -> NE transform). */
+ {
+ step = iv0.step;
+ tmp0 = lowpart_subreg (mode, iv0.base, comp_mode);
+ tmp1 = lowpart_subreg (mode, iv1.base, comp_mode);
+
+ bound = simplify_gen_binary (MINUS, mode, mmax, step);
+ assumption = simplify_gen_relational (cond, SImode, mode,
+ tmp1, bound);
+ desc->assumptions =
+ alloc_EXPR_LIST (0, assumption, desc->assumptions);
+
+ tmp = simplify_gen_binary (PLUS, comp_mode, iv1.base, iv0.step);
+ tmp = lowpart_subreg (mode, tmp, comp_mode);
+ assumption = simplify_gen_relational (reverse_condition (cond),
+ SImode, mode, tmp0, tmp);
+
+ delta = simplify_gen_binary (PLUS, mode, tmp1, step);
+ delta = simplify_gen_binary (MINUS, mode, delta, tmp0);
+ }
+ else
+ {
+ /* Condition in shape a <= b - s * i
+ We must know that a - s does not overflow and a - s <= b and then
+ we can again compute number of iterations as (b - (a - s)) / s. */
+ step = simplify_gen_unary (NEG, mode, iv1.step, mode);
+ tmp0 = lowpart_subreg (mode, iv0.base, comp_mode);
+ tmp1 = lowpart_subreg (mode, iv1.base, comp_mode);
+
+ bound = simplify_gen_binary (MINUS, mode, mmin, step);
+ assumption = simplify_gen_relational (cond, SImode, mode,
+ bound, tmp0);
+ desc->assumptions =
+ alloc_EXPR_LIST (0, assumption, desc->assumptions);
+
+ tmp = simplify_gen_binary (PLUS, comp_mode, iv0.base, iv1.step);
+ tmp = lowpart_subreg (mode, tmp, comp_mode);
+ assumption = simplify_gen_relational (reverse_condition (cond),
+ SImode, mode,
+ tmp, tmp1);
+ delta = simplify_gen_binary (MINUS, mode, tmp0, step);
+ delta = simplify_gen_binary (MINUS, mode, tmp1, delta);
+ }
+ if (assumption == const_true_rtx)
+ goto zero_iter;
+ else if (assumption != const0_rtx)
+ desc->noloop_assumptions =
+ alloc_EXPR_LIST (0, assumption, desc->noloop_assumptions);
+ delta = simplify_gen_binary (UDIV, mode, delta, step);
+ desc->niter_expr = delta;
+ }
+
+ simplify_using_initial_values (loop, AND, &desc->assumptions);
+ if (desc->assumptions
+ && XEXP (desc->assumptions, 0) == const0_rtx)
+ goto fail;
+ simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
+ simplify_using_initial_values (loop, IOR, &desc->infinite);
+ simplify_using_initial_values (loop, NIL, &desc->niter_expr);
+
+ /* Rerun the simplification. Consider code (created by copying loop headers)
+
+ i = 0;
+
+ if (0 < n)
+ {
+ do
+ {
+ i++;
+ } while (i < n);
+ }
+
+ The first pass determines that i = 0, the second pass uses it to eliminate
+ noloop assumption. */
+
+ simplify_using_initial_values (loop, AND, &desc->assumptions);
+ if (desc->assumptions
+ && XEXP (desc->assumptions, 0) == const0_rtx)
+ goto fail;
+ simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
+ simplify_using_initial_values (loop, IOR, &desc->infinite);
+ simplify_using_initial_values (loop, NIL, &desc->niter_expr);
+
+ if (GET_CODE (desc->niter_expr) == CONST_INT)
+ {
+ unsigned HOST_WIDEST_INT val = INTVAL (desc->niter_expr);
+
+ desc->const_iter = true;
+ desc->niter_max = desc->niter = val & GET_MODE_MASK (desc->mode);
+ }
+ else if (!desc->niter_max)
+ desc->niter_max = determine_max_iter (desc);
+
+ return;
+
+fail:
+ desc->simple_p = false;
+ return;
+
+zero_iter:
+ desc->const_iter = true;
+ desc->niter = 0;
+ desc->niter_max = 0;
+ desc->niter_expr = const0_rtx;
+ return;
+}
+
+/* Checks whether E is a simple exit from LOOP and stores its description
+ into DESC. TODO Should replace cfgloopanal.c:simple_loop_exit_p. */
+
+static void
+check_simple_exit (struct loop *loop, edge e, struct niter_desc *desc)
+{
+ basic_block exit_bb;
+ rtx condition, at;
+ edge ei;
+
+ exit_bb = e->src;
+ desc->simple_p = false;
+
+ /* It must belong directly to the loop. */
+ if (exit_bb->loop_father != loop)
+ return;
+
+ /* It must be tested (at least) once during any iteration. */
+ if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit_bb))
+ return;
+
+ /* It must end in a simple conditional jump. */
+ if (!any_condjump_p (BB_END (exit_bb)))
+ return;
+
+ ei = exit_bb->succ;
+ if (ei == e)
+ ei = ei->succ_next;
+
+ desc->out_edge = e;
+ desc->in_edge = ei;
+
+ /* Test whether the condition is suitable. */
+ if (!(condition = get_condition (BB_END (ei->src), &at, false)))
+ return;
+
+ if (ei->flags & EDGE_FALLTHRU)
+ {
+ condition = reversed_condition (condition);
+ if (!condition)
+ return;
+ }
+
+ /* Check that we are able to determine number of iterations and fill
+ in information about it. */
+ iv_number_of_iterations (loop, at, condition, desc);
+}
+
+/* Finds a simple exit of LOOP and stores its description into DESC.
+ TODO Should replace cfgloopanal.c:simple_loop_p. */
+
+void
+find_simple_exit (struct loop *loop, struct niter_desc *desc)
+{
+ unsigned i;
+ basic_block *body;
+ edge e;
+ struct niter_desc act;
+ bool any = false;
+
+ desc->simple_p = false;
+ body = get_loop_body (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ for (e = body[i]->succ; e; e = e->succ_next)
+ {
+ if (flow_bb_inside_loop_p (loop, e->dest))
+ continue;
+
+ check_simple_exit (loop, e, &act);
+ if (!act.simple_p)
+ continue;
+
+ /* Prefer constant iterations; the less the better. */
+ if (!any)
+ any = true;
+ else if (!act.const_iter
+ || (desc->const_iter && act.niter >= desc->niter))
+ continue;
+ *desc = act;
+ }
+ }
+
+ if (rtl_dump_file)
+ {
+ if (desc->simple_p)
+ {
+ fprintf (rtl_dump_file, "Loop %d is simple:\n", loop->num);
+ fprintf (rtl_dump_file, " simple exit %d -> %d\n",
+ desc->out_edge->src->index,
+ desc->out_edge->dest->index);
+ if (desc->assumptions)
+ {
+ fprintf (rtl_dump_file, " assumptions: ");
+ print_rtl (rtl_dump_file, desc->assumptions);
+ fprintf (rtl_dump_file, "\n");
+ }
+ if (desc->noloop_assumptions)
+ {
+ fprintf (rtl_dump_file, " does not roll if: ");
+ print_rtl (rtl_dump_file, desc->noloop_assumptions);
+ fprintf (rtl_dump_file, "\n");
+ }
+ if (desc->infinite)
+ {
+ fprintf (rtl_dump_file, " infinite if: ");
+ print_rtl (rtl_dump_file, desc->infinite);
+ fprintf (rtl_dump_file, "\n");
+ }
+
+ fprintf (rtl_dump_file, " number of iterations: ");
+ print_rtl (rtl_dump_file, desc->niter_expr);
+ fprintf (rtl_dump_file, "\n");
+
+ fprintf (rtl_dump_file, " upper bound: ");
+ fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter_max);
+ fprintf (rtl_dump_file, "\n");
+ }
+ else
+ fprintf (rtl_dump_file, "Loop %d is not simple.\n", loop->num);
+ }
+
+ free (body);
+}
+
+/* Creates a simple loop description of LOOP if it was not computed
+ already. */
+
+struct niter_desc *
+get_simple_loop_desc (struct loop *loop)
+{
+ struct niter_desc *desc = simple_loop_desc (loop);
+
+ if (desc)
+ return desc;
+
+ desc = xmalloc (sizeof (struct niter_desc));
+ iv_analysis_loop_init (loop);
+ find_simple_exit (loop, desc);
+ loop->aux = desc;
+
+ return desc;
+}
+
+/* Releases simple loop description for LOOP. */
+
+void
+free_simple_loop_desc (struct loop *loop)
+{
+ struct niter_desc *desc = simple_loop_desc (loop);
+
+ if (!desc)
+ return;
+
+ free (desc);
+ loop->aux = NULL;
+}
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 6c796af..b093a7d 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -85,7 +85,7 @@ void
unroll_and_peel_loops (struct loops *loops, int flags)
{
struct loop *loop, *next;
- int check;
+ bool check;
/* First perform complete loop peeling (it is almost surely a win,
and affects parameters for further decision a lot). */
@@ -110,7 +110,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
else
next = loop->outer;
- check = 1;
+ check = true;
/* And perform the appropriate transformations. */
switch (loop->lpt_decision.decision)
{
@@ -130,7 +130,7 @@ unroll_and_peel_loops (struct loops *loops, int flags)
unroll_loop_stupid (loops, loop);
break;
case LPT_NONE:
- check = 0;
+ check = false;
break;
default:
abort ();
@@ -144,6 +144,29 @@ unroll_and_peel_loops (struct loops *loops, int flags)
}
loop = next;
}
+
+ iv_analysis_done ();
+}
+
+/* Check whether exit of the LOOP is at the end of loop body. */
+
+static bool
+loop_exit_at_end_p (struct loop *loop)
+{
+ struct niter_desc *desc = get_simple_loop_desc (loop);
+ rtx insn;
+
+ if (desc->in_edge->dest != loop->latch)
+ return false;
+
+ /* Check that the latch is empty. */
+ FOR_BB_INSNS (loop->latch, insn)
+ {
+ if (INSN_P (insn))
+ return false;
+ }
+
+ return true;
}
/* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */
@@ -168,10 +191,9 @@ peel_loops_completely (struct loops *loops, int flags)
next = loop->outer;
loop->lpt_decision.decision = LPT_NONE;
- loop->has_desc = 0;
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n",
+ fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n",
loop->num);
loop->ninsns = num_loop_insns (loop);
@@ -216,7 +238,7 @@ decide_unrolling_and_peeling (struct loops *loops, int flags)
loop->lpt_decision.decision = LPT_NONE;
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num);
+ fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
/* Do not peel cold areas. */
if (!maybe_hot_bb_p (loop->header))
@@ -269,8 +291,10 @@ decide_unrolling_and_peeling (struct loops *loops, int flags)
static void
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
{
+ struct niter_desc *desc;
+
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n");
+ fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n");
/* Is the loop small enough? */
if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
@@ -281,11 +305,13 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
}
/* Check for simple loops. */
- loop->simple = simple_loop_p (loop, &loop->desc);
- loop->has_desc = 1;
+ desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
- if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0)
+ if (!desc->simple_p
+ || desc->assumptions
+ || !desc->const_iter
+ || desc->niter != 0)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
@@ -303,9 +329,10 @@ static void
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
{
unsigned npeel;
+ struct niter_desc *desc;
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Considering peeling completely\n");
+ fprintf (rtl_dump_file, "\n;; Considering peeling completely\n");
/* Skip non-innermost loops. */
if (loop->inner)
@@ -346,26 +373,24 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
}
/* Check for simple loops. */
- if (!loop->has_desc)
- {
- loop->simple = simple_loop_p (loop, &loop->desc);
- loop->has_desc = 1;
- }
+ desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
- if (!loop->simple || !loop->desc.const_iter)
+ if (!desc->simple_p
+ || desc->assumptions
+ || !desc->const_iter)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
return;
}
- if (loop->desc.niter > npeel - 1)
+ if (desc->niter > npeel - 1)
{
if (rtl_dump_file)
{
fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
- fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter);
+ fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
}
return;
@@ -397,8 +422,8 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
sbitmap wont_exit;
unsigned HOST_WIDE_INT npeel;
unsigned n_remove_edges, i;
- edge *remove_edges;
- struct loop_desc *desc = &loop->desc;
+ edge *remove_edges, ei;
+ struct niter_desc *desc = get_simple_loop_desc (loop);
npeel = desc->niter;
@@ -407,7 +432,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
wont_exit = sbitmap_alloc (npeel + 1);
sbitmap_ones (wont_exit);
RESET_BIT (wont_exit, 0);
- if (desc->may_be_zero)
+ if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
remove_edges = xcalloc (npeel, sizeof (edge));
@@ -427,19 +452,24 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
free (remove_edges);
}
+ ei = desc->in_edge;
+ free_simple_loop_desc (loop);
+
/* Now remove the unreachable part of the last iteration and cancel
the loop. */
- remove_path (loops, desc->in_edge);
+ remove_path (loops, ei);
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
}
/* Decide whether to unroll LOOP iterating constant number of times and how much. */
+
static void
decide_unroll_constant_iterations (struct loop *loop, int flags)
{
- unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i;
+ unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
+ struct niter_desc *desc;
if (!(flags & UAP_UNROLL))
{
@@ -448,7 +478,8 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
}
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n");
+ fprintf (rtl_dump_file,
+ "\n;; Considering unrolling loop with constant number of iterations\n");
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
@@ -468,14 +499,10 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
}
/* Check for simple loops. */
- if (!loop->has_desc)
- {
- loop->simple = simple_loop_p (loop, &loop->desc);
- loop->has_desc = 1;
- }
+ desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
- if (!loop->simple || !loop->desc.const_iter)
+ if (!desc->simple_p || !desc->const_iter || desc->assumptions)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
@@ -483,7 +510,7 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
}
/* Check whether the loop rolls enough to consider. */
- if (loop->desc.niter < 2 * nunroll)
+ if (desc->niter < 2 * nunroll)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
@@ -497,16 +524,17 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
best_copies = 2 * nunroll + 10;
i = 2 * nunroll + 2;
- if ((unsigned) i - 1 >= loop->desc.niter)
- i = loop->desc.niter - 2;
+ if (i - 1 >= desc->niter)
+ i = desc->niter - 2;
for (; i >= nunroll - 1; i--)
{
- unsigned exit_mod = loop->desc.niter % (i + 1);
+ unsigned exit_mod = desc->niter % (i + 1);
- if (loop->desc.postincr)
+ if (!loop_exit_at_end_p (loop))
n_copies = exit_mod + i + 1;
- else if (exit_mod != (unsigned) i || loop->desc.may_be_zero)
+ else if (exit_mod != (unsigned) i
+ || desc->noloop_assumptions != NULL_RTX)
n_copies = exit_mod + i + 2;
else
n_copies = i + 1;
@@ -524,6 +552,11 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
loop->lpt_decision.times = best_unroll;
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ ";; Decided to unroll the constant times rolling loop, %d times.\n",
+ loop->lpt_decision.times);
}
/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
@@ -554,11 +587,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
unsigned n_remove_edges, i;
edge *remove_edges;
unsigned max_unroll = loop->lpt_decision.times;
- struct loop_desc *desc = &loop->desc;
+ struct niter_desc *desc = get_simple_loop_desc (loop);
+ bool exit_at_end = loop_exit_at_end_p (loop);
niter = desc->niter;
- if (niter <= (unsigned) max_unroll + 1)
+ if (niter <= max_unroll + 1)
abort (); /* Should not get here (such loop should be peeled instead). */
exit_mod = niter % (max_unroll + 1);
@@ -569,9 +603,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
n_remove_edges = 0;
- if (desc->postincr)
+ if (!exit_at_end)
{
- /* Counter is incremented after the exit test; leave exit test
+ /* The exit is not at the end of the loop; leave exit test
in the first copy, so that the loops that start with test
of exit condition have continuous body after unrolling. */
@@ -580,15 +614,22 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
/* Peel exit_mod iterations. */
RESET_BIT (wont_exit, 0);
- if (desc->may_be_zero)
+ if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
- if (exit_mod
- && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
- loops, exit_mod,
- wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
- DLTHE_FLAG_UPDATE_FREQ))
- abort ();
+ if (exit_mod)
+ {
+ if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
+ loops, exit_mod,
+ wont_exit, desc->out_edge,
+ remove_edges, &n_remove_edges,
+ DLTHE_FLAG_UPDATE_FREQ))
+ abort ();
+
+ desc->noloop_assumptions = NULL_RTX;
+ desc->niter -= exit_mod;
+ desc->niter_max -= exit_mod;
+ }
SET_BIT (wont_exit, 1);
}
@@ -602,12 +643,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
/* We know that niter >= max_unroll + 2; so we do not need to care of
case when we would exit before reaching the loop. So just peel
- exit_mod + 1 iterations.
- */
- if (exit_mod != (unsigned) max_unroll || desc->may_be_zero)
+ exit_mod + 1 iterations. */
+ if (exit_mod != max_unroll
+ || desc->noloop_assumptions)
{
RESET_BIT (wont_exit, 0);
- if (desc->may_be_zero)
+ if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
@@ -616,6 +657,10 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
DLTHE_FLAG_UPDATE_FREQ))
abort ();
+ desc->niter -= exit_mod + 1;
+ desc->niter_max -= exit_mod + 1;
+ desc->noloop_assumptions = NULL_RTX;
+
SET_BIT (wont_exit, 0);
SET_BIT (wont_exit, 1);
}
@@ -632,6 +677,27 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
free (wont_exit);
+ if (exit_at_end)
+ {
+ basic_block exit_block = desc->in_edge->src->rbi->copy;
+ /* Find a new in and out edge; they are in the last copy we have made. */
+
+ if (exit_block->succ->dest == desc->out_edge->dest)
+ {
+ desc->out_edge = exit_block->succ;
+ desc->in_edge = exit_block->succ->succ_next;
+ }
+ else
+ {
+ desc->out_edge = exit_block->succ->succ_next;
+ desc->in_edge = exit_block->succ;
+ }
+ }
+
+ desc->niter /= max_unroll + 1;
+ desc->niter_max /= max_unroll + 1;
+ desc->niter_expr = GEN_INT (desc->niter);
+
/* Remove the edges. */
for (i = 0; i < n_remove_edges; i++)
remove_path (loops, remove_edges[i]);
@@ -647,6 +713,7 @@ static void
decide_unroll_runtime_iterations (struct loop *loop, int flags)
{
unsigned nunroll, nunroll_by_av, i;
+ struct niter_desc *desc;
if (!(flags & UAP_UNROLL))
{
@@ -655,7 +722,8 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
}
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
+ fprintf (rtl_dump_file,
+ "\n;; Considering unrolling loop with runtime computable number of iterations\n");
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
@@ -675,21 +743,18 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
}
/* Check for simple loops. */
- if (!loop->has_desc)
- {
- loop->simple = simple_loop_p (loop, &loop->desc);
- loop->has_desc = 1;
- }
+ desc = get_simple_loop_desc (loop);
/* Check simpleness. */
- if (!loop->simple)
+ if (!desc->simple_p || desc->assumptions)
{
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n");
+ fprintf (rtl_dump_file,
+ ";; Unable to prove that the number of iterations can be counted in runtime\n");
return;
}
- if (loop->desc.const_iter)
+ if (desc->const_iter)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
@@ -706,10 +771,16 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
/* Success; now force nunroll to be power of 2, as we are unable to
cope with overflows in computation of number of iterations. */
- for (i = 1; 2 * i <= nunroll; i *= 2);
+ for (i = 1; 2 * i <= nunroll; i *= 2)
+ continue;
loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
loop->lpt_decision.times = i - 1;
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ ";; Decided to unroll the runtime computable times rolling loop, %d times.\n",
+ loop->lpt_decision.times);
}
/* Unroll LOOP for that we are able to count number of iterations in runtime
@@ -746,7 +817,7 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
static void
unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
{
- rtx niter, init_code, branch_code, jump, label;
+ rtx old_niter, niter, init_code, branch_code, tmp;
unsigned i, j, p;
basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
unsigned n_dom_bbs;
@@ -756,7 +827,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
edge *remove_edges, e;
bool extra_zero_check, last_may_exit;
unsigned max_unroll = loop->lpt_decision.times;
- struct loop_desc *desc = &loop->desc;
+ struct niter_desc *desc = get_simple_loop_desc (loop);
+ bool exit_at_end = loop_exit_at_end_p (loop);
/* Remember blocks whose dominators will have to be updated. */
dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
@@ -777,7 +849,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
}
free (body);
- if (desc->postincr)
+ if (!exit_at_end)
{
/* Leave exit in first copy (for explanation why see comment in
unroll_loop_constant_iterations). */
@@ -798,15 +870,15 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
/* Get expression for number of iterations. */
start_sequence ();
- niter = count_loop_iterations (desc, NULL, NULL);
- if (!niter)
- abort ();
- niter = force_operand (niter, NULL);
+ old_niter = niter = gen_reg_rtx (desc->mode);
+ tmp = force_operand (copy_rtx (desc->niter_expr), niter);
+ if (tmp != niter)
+ emit_move_insn (niter, tmp);
/* Count modulo by ANDing it with max_unroll; we use the fact that
the number of unrollings is a power of two, and thus this is correct
even if there is overflow in the computation. */
- niter = expand_simple_binop (GET_MODE (desc->var), AND,
+ niter = expand_simple_binop (desc->mode, AND,
niter,
GEN_INT (max_unroll),
NULL_RTX, 0, OPTAB_LIB_WIDEN);
@@ -824,10 +896,11 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
/* Peel the first copy of loop body (almost always we must leave exit test
here; the only exception is when we have extra zero check and the number
- of iterations is reliable (i.e. comes out of NE condition). Also record
- the place of (possible) extra zero check. */
+ of iterations is reliable. Also record the place of (possible) extra
+ zero check. */
sbitmap_zero (wont_exit);
- if (extra_zero_check && desc->cond == NE)
+ if (extra_zero_check
+ && !desc->noloop_assumptions)
SET_BIT (wont_exit, 1);
ezc_swtch = loop_preheader_edge (loop)->src;
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
@@ -857,20 +930,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
p = REG_BR_PROB_BASE / (i + 2);
preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
- label = block_label (preheader);
- start_sequence ();
- do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0,
- GET_MODE (desc->var), NULL_RTX, NULL_RTX,
- label);
- jump = get_last_insn ();
- JUMP_LABEL (jump) = label;
- REG_NOTES (jump)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (p), REG_NOTES (jump));
-
- LABEL_NUSES (label)++;
- branch_code = get_insns ();
- end_sequence ();
+ branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
+ block_label (preheader), p, NULL_RTX);
swtch = loop_split_edge_with (swtch->pred, branch_code);
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
@@ -886,20 +947,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
p = REG_BR_PROB_BASE / (max_unroll + 1);
swtch = ezc_swtch;
preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
- label = block_label (preheader);
- start_sequence ();
- do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0,
- GET_MODE (desc->var), NULL_RTX, NULL_RTX,
- label);
- jump = get_last_insn ();
- JUMP_LABEL (jump) = label;
- REG_NOTES (jump)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (p), REG_NOTES (jump));
-
- LABEL_NUSES (label)++;
- branch_code = get_insns ();
- end_sequence ();
+ branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
+ block_label (preheader), p, NULL_RTX);
swtch = loop_split_edge_with (swtch->succ, branch_code);
set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
@@ -925,11 +974,45 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
free (wont_exit);
+ if (exit_at_end)
+ {
+ basic_block exit_block = desc->in_edge->src->rbi->copy;
+ /* Find a new in and out edge; they are in the last copy we have made. */
+
+ if (exit_block->succ->dest == desc->out_edge->dest)
+ {
+ desc->out_edge = exit_block->succ;
+ desc->in_edge = exit_block->succ->succ_next;
+ }
+ else
+ {
+ desc->out_edge = exit_block->succ->succ_next;
+ desc->in_edge = exit_block->succ;
+ }
+ }
+
/* Remove the edges. */
for (i = 0; i < n_remove_edges; i++)
remove_path (loops, remove_edges[i]);
free (remove_edges);
+ /* We must be careful when updating the number of iterations due to
+ preconditioning and the fact that the value must be valid at entry
+ of the loop. After passing through the above code, we see that
+ the correct new number of iterations is this: */
+ if (desc->const_iter)
+ abort ();
+ desc->niter_expr =
+ simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1));
+ desc->niter_max /= max_unroll + 1;
+ if (exit_at_end)
+ {
+ desc->niter_expr =
+ simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
+ desc->noloop_assumptions = NULL_RTX;
+ desc->niter_max--;
+ }
+
if (rtl_dump_file)
fprintf (rtl_dump_file,
";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
@@ -941,6 +1024,7 @@ static void
decide_peel_simple (struct loop *loop, int flags)
{
unsigned npeel;
+ struct niter_desc *desc;
if (!(flags & UAP_PEEL))
{
@@ -949,7 +1033,7 @@ decide_peel_simple (struct loop *loop, int flags)
}
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Considering simply peeling loop\n");
+ fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n");
/* npeel = number of iterations to peel. */
npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
@@ -965,14 +1049,10 @@ decide_peel_simple (struct loop *loop, int flags)
}
/* Check for simple loops. */
- if (!loop->has_desc)
- {
- loop->simple = simple_loop_p (loop, &loop->desc);
- loop->has_desc = 1;
- }
+ desc = get_simple_loop_desc (loop);
/* Check number of iterations. */
- if (loop->simple && loop->desc.const_iter)
+ if (desc->simple_p && !desc->assumptions && desc->const_iter)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
@@ -981,7 +1061,7 @@ decide_peel_simple (struct loop *loop, int flags)
/* Do not simply peel loops with branches inside -- it increases number
of mispredicts. */
- if (loop->desc.n_branches > 1)
+ if (num_loop_branches (loop) > 1)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
@@ -1016,6 +1096,10 @@ decide_peel_simple (struct loop *loop, int flags)
/* Success. */
loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
loop->lpt_decision.times = npeel;
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n",
+ loop->lpt_decision.times);
}
/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
@@ -1037,6 +1121,7 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
{
sbitmap wont_exit;
unsigned npeel = loop->lpt_decision.times;
+ struct niter_desc *desc = get_simple_loop_desc (loop);
wont_exit = sbitmap_alloc (npeel + 1);
sbitmap_zero (wont_exit);
@@ -1048,6 +1133,23 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
free (wont_exit);
+ if (desc->simple_p)
+ {
+ if (desc->const_iter)
+ {
+ desc->niter -= npeel;
+ desc->niter_expr = GEN_INT (desc->niter);
+ desc->noloop_assumptions = NULL_RTX;
+ }
+ else
+ {
+ /* We cannot just update niter_expr, as its value might be clobbered
+ inside loop. We could handle this by counting the number into
+ temporary just like we do in runtime unrolling, but it does not
+ seem worthwhile. */
+ free_simple_loop_desc (loop);
+ }
+ }
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
}
@@ -1057,6 +1159,7 @@ static void
decide_unroll_stupid (struct loop *loop, int flags)
{
unsigned nunroll, nunroll_by_av, i;
+ struct niter_desc *desc;
if (!(flags & UAP_UNROLL_ALL))
{
@@ -1065,7 +1168,7 @@ decide_unroll_stupid (struct loop *loop, int flags)
}
if (rtl_dump_file)
- fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
+ fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n");
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
@@ -1085,14 +1188,10 @@ decide_unroll_stupid (struct loop *loop, int flags)
}
/* Check for simple loops. */
- if (!loop->has_desc)
- {
- loop->simple = simple_loop_p (loop, &loop->desc);
- loop->has_desc = 1;
- }
+ desc = get_simple_loop_desc (loop);
/* Check simpleness. */
- if (loop->simple)
+ if (desc->simple_p && !desc->assumptions)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; The loop is simple\n");
@@ -1101,7 +1200,7 @@ decide_unroll_stupid (struct loop *loop, int flags)
/* Do not unroll loops with branches inside -- it increases number
of mispredicts. */
- if (loop->desc.n_branches > 1)
+ if (num_loop_branches (loop) > 1)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
@@ -1109,7 +1208,8 @@ decide_unroll_stupid (struct loop *loop, int flags)
}
/* If we have profile feedback, check whether the loop rolls. */
- if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
+ if (loop->header->count
+ && expected_loop_iterations (loop) < 2 * nunroll)
{
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
@@ -1119,10 +1219,16 @@ decide_unroll_stupid (struct loop *loop, int flags)
/* Success. Now force nunroll to be power of 2, as it seems that this
improves results (partially because of better alignments, partially
because of some dark magic). */
- for (i = 1; 2 * i <= nunroll; i *= 2);
+ for (i = 1; 2 * i <= nunroll; i *= 2)
+ continue;
loop->lpt_decision.decision = LPT_UNROLL_STUPID;
loop->lpt_decision.times = i - 1;
+
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file,
+ ";; Decided to unroll the loop stupidly, %d times.\n",
+ loop->lpt_decision.times);
}
/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
@@ -1147,6 +1253,7 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
{
sbitmap wont_exit;
unsigned nunroll = loop->lpt_decision.times;
+ struct niter_desc *desc = get_simple_loop_desc (loop);
wont_exit = sbitmap_alloc (nunroll + 1);
sbitmap_zero (wont_exit);
@@ -1158,6 +1265,17 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
free (wont_exit);
+ if (desc->simple_p)
+ {
+ /* We indeed may get here provided that there are nontrivial assumptions
+ for a loop to be really simple. We could update the counts, but the
+ problem is that we are unable to decide which exit will be taken
+ (not really true in case the number of iterations is constant,
+ but noone will do anything with this information, so we do not
+ worry about it). */
+ desc->simple_p = false;
+ }
+
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
nunroll, num_loop_insns (loop));
diff --git a/gcc/loop-unswitch.c b/gcc/loop-unswitch.c
index 40f0e83..ebbabe8 100644
--- a/gcc/loop-unswitch.c
+++ b/gcc/loop-unswitch.c
@@ -79,11 +79,63 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
with handling this case. */
static struct loop *unswitch_loop (struct loops *, struct loop *,
- basic_block);
+ basic_block, rtx, rtx);
static void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
-static bool may_unswitch_on_p (basic_block, struct loop *,
- basic_block *);
-static rtx reversed_condition (rtx);
+static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
+
+/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
+ true, with probability PROB. If CINSN is not NULL, it is the insn to copy
+ in order to create a jump. */
+
+rtx
+compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
+ rtx cinsn)
+{
+ rtx seq, jump, cond;
+ enum machine_mode mode;
+
+ mode = GET_MODE (op0);
+ if (mode == VOIDmode)
+ mode = GET_MODE (op1);
+
+ start_sequence ();
+ if (GET_MODE_CLASS (mode) == MODE_CC)
+ {
+ /* A hack -- there seems to be no easy generic way how to make a
+ conditional jump from a ccmode comparison. */
+ if (!cinsn)
+ abort ();
+ cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
+ if (GET_CODE (cond) != comp
+ || !rtx_equal_p (op0, XEXP (cond, 0))
+ || !rtx_equal_p (op1, XEXP (cond, 1)))
+ abort ();
+ emit_jump_insn (copy_insn (PATTERN (cinsn)));
+ jump = get_last_insn ();
+ JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
+ LABEL_NUSES (JUMP_LABEL (jump))++;
+ redirect_jump (jump, label, 0);
+ }
+ else
+ {
+ if (cinsn)
+ abort ();
+
+ op0 = force_operand (op0, NULL_RTX);
+ op1 = force_operand (op1, NULL_RTX);
+ do_compare_rtx_and_jump (op0, op1, comp, 0,
+ mode, NULL_RTX, NULL_RTX, label);
+ jump = get_last_insn ();
+ JUMP_LABEL (jump) = label;
+ LABEL_NUSES (label)++;
+ }
+ REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
+ REG_NOTES (jump));
+ seq = get_insns ();
+ end_sequence ();
+
+ return seq;
+}
/* Main entry point. Perform loop unswitching on all suitable LOOPS. */
void
@@ -111,48 +163,82 @@ unswitch_loops (struct loops *loops)
verify_loop_structure (loops);
#endif
}
+
+ iv_analysis_done ();
}
/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
- basic blocks (for what it means see comments below). List of basic blocks
- inside LOOP is provided in BODY to save time. */
-static bool
-may_unswitch_on_p (basic_block bb, struct loop *loop, basic_block *body)
+ basic blocks (for what it means see comments below). In case condition
+ compares loop invariant cc mode register, return the jump in CINSN. */
+
+static rtx
+may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
{
- rtx test;
+ rtx test, at, insn, op[2];
+ struct rtx_iv iv;
unsigned i;
+ enum machine_mode mode;
/* BB must end in a simple conditional jump. */
if (!bb->succ || !bb->succ->succ_next || bb->succ->succ_next->succ_next)
- return false;
+ return NULL_RTX;
if (!any_condjump_p (BB_END (bb)))
- return false;
+ return NULL_RTX;
/* With branches inside loop. */
if (!flow_bb_inside_loop_p (loop, bb->succ->dest)
|| !flow_bb_inside_loop_p (loop, bb->succ->succ_next->dest))
- return false;
+ return NULL_RTX;
/* It must be executed just once each iteration (because otherwise we
are unable to update dominator/irreducible loop information correctly). */
if (!just_once_each_iteration_p (loop, bb))
- return false;
+ return NULL_RTX;
- /* Condition must be invariant. We use just a stupid test of invariantness
- of the condition: all used regs must not be modified inside loop body. */
- test = get_condition (BB_END (bb), NULL, true);
+ /* Condition must be invariant. */
+ test = get_condition (BB_END (bb), &at, true);
if (!test)
- return false;
+ return NULL_RTX;
+
+ for (i = 0; i < 2; i++)
+ {
+ op[i] = XEXP (test, i);
+
+ if (CONSTANT_P (op[i]))
+ continue;
+
+ insn = iv_get_reaching_def (at, op[i]);
+ if (!iv_analyse (insn, op[i], &iv))
+ return NULL_RTX;
+ if (iv.step != const0_rtx
+ || iv.first_special)
+ return NULL_RTX;
+
+ op[i] = get_iv_value (&iv, const0_rtx);
+ }
+
+ mode = GET_MODE (op[0]);
+ if (mode == VOIDmode)
+ mode = GET_MODE (op[1]);
+ if (GET_MODE_CLASS (mode) == MODE_CC)
+ {
+ if (at != BB_END (bb))
+ return NULL_RTX;
- for (i = 0; i < loop->num_nodes; i++)
- if (modified_between_p (test, BB_HEAD (body[i]), NEXT_INSN (BB_END (body[i]))))
- return false;
+ *cinsn = BB_END (bb);
+ if (!rtx_equal_p (op[0], XEXP (test, 0))
+ || !rtx_equal_p (op[1], XEXP (test, 1)))
+ return NULL_RTX;
- return true;
+ return test;
+ }
+
+ return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
+ op[0], op[1]));
}
/* Reverses CONDition; returns NULL if we cannot. */
-static rtx
+rtx
reversed_condition (rtx cond)
{
enum rtx_code reversed;
@@ -173,13 +259,10 @@ static void
unswitch_single_loop (struct loops *loops, struct loop *loop,
rtx cond_checked, int num)
{
- basic_block *bbs, bb;
+ basic_block *bbs;
struct loop *nloop;
unsigned i;
- int true_first;
- rtx cond, rcond, conds, rconds, acond, split_before;
- int always_true;
- int always_false;
+ rtx cond, rcond, conds, rconds, acond, cinsn = NULL_RTX;
int repeat;
edge e;
@@ -237,8 +320,9 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
/* Find a bb to unswitch on. */
bbs = get_loop_body (loop);
+ iv_analysis_loop_init (loop);
for (i = 0; i < loop->num_nodes; i++)
- if (may_unswitch_on_p (bbs[i], loop, bbs))
+ if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
break;
if (i == loop->num_nodes)
@@ -247,39 +331,26 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
return;
}
- if (!(cond = get_condition (BB_END (bbs[i]), &split_before, true)))
- abort ();
rcond = reversed_condition (cond);
+ if (rcond)
+ rcond = canon_condition (rcond);
/* Check whether the result can be predicted. */
- always_true = 0;
- always_false = 0;
for (acond = cond_checked; acond; acond = XEXP (acond, 1))
- {
- if (rtx_equal_p (cond, XEXP (acond, 0)))
- {
- always_true = 1;
- break;
- }
- if (rtx_equal_p (rcond, XEXP (acond, 0)))
- {
- always_false = 1;
- break;
- }
- }
+ simplify_using_condition (XEXP (acond, 0), &cond, NULL);
- if (always_true)
+ if (cond == const_true_rtx)
{
/* Remove false path. */
- for (e = bbs[i]->succ; !(e->flags & EDGE_FALLTHRU); e = e->succ_next);
+ e = FALLTHRU_EDGE (bbs[i]);
remove_path (loops, e);
free (bbs);
repeat = 1;
}
- else if (always_false)
+ else if (cond == const0_rtx)
{
/* Remove true path. */
- for (e = bbs[i]->succ; e->flags & EDGE_FALLTHRU; e = e->succ_next);
+ e = BRANCH_EDGE (bbs[i]);
remove_path (loops, e);
free (bbs);
repeat = 1;
@@ -293,21 +364,17 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
else
rconds = cond_checked;
- /* Separate condition in a single basic block. */
- bb = split_loop_bb (bbs[i], PREV_INSN (split_before))->dest;
- free (bbs);
- true_first = !(bb->succ->flags & EDGE_FALLTHRU);
if (rtl_dump_file)
fprintf (rtl_dump_file, ";; Unswitching loop\n");
/* Unswitch the loop on this condition. */
- nloop = unswitch_loop (loops, loop, bb);
+ nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn);
if (!nloop)
abort ();
/* Invoke itself on modified loops. */
- unswitch_single_loop (loops, nloop, true_first ? conds : rconds, num + 1);
- unswitch_single_loop (loops, loop, true_first ? rconds : conds, num + 1);
+ unswitch_single_loop (loops, nloop, rconds, num + 1);
+ unswitch_single_loop (loops, loop, conds, num + 1);
free_EXPR_LIST_node (conds);
if (rcond)
@@ -316,17 +383,21 @@ unswitch_single_loop (struct loops *loops, struct loop *loop,
/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support
unswitching of innermost loops. UNSWITCH_ON must be executed in every
- iteration, i.e. it must dominate LOOP latch, and should only contain code
- for the condition we unswitch on. Returns NULL if impossible, new
- loop otherwise. */
+ iteration, i.e. it must dominate LOOP latch. COND is the condition
+ determining which loop is entered. Returns NULL if impossible, new loop
+ otherwise. The new loop is entered if COND is true. If CINSN is not
+ NULL, it is the insn in that COND is compared. */
+
static struct loop *
-unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
+unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on,
+ rtx cond, rtx cinsn)
{
- edge entry, latch_edge;
+ edge entry, latch_edge, true_edge, false_edge, e;
basic_block switch_bb, unswitch_on_alt, src;
struct loop *nloop;
sbitmap zero_bitmap;
- int irred_flag;
+ int irred_flag, prob;
+ rtx seq;
/* Some sanity checking. */
if (!flow_bb_inside_loop_p (loop, unswitch_on))
@@ -343,12 +414,6 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
if (!flow_bb_inside_loop_p (loop, unswitch_on->succ->succ_next->dest))
abort ();
- /* Will we be able to perform redirection? */
- if (!any_condjump_p (BB_END (unswitch_on)))
- return NULL;
- if (!cfg_layout_can_duplicate_bb_p (unswitch_on))
- return NULL;
-
entry = loop_preheader_edge (loop);
/* Make a copy. */
@@ -365,10 +430,24 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
/* Record the block with condition we unswitch on. */
unswitch_on_alt = unswitch_on->rbi->copy;
+ true_edge = BRANCH_EDGE (unswitch_on_alt);
+ false_edge = FALLTHRU_EDGE (unswitch_on);
+ latch_edge = loop->latch->rbi->copy->succ;
+
+ /* Create a block with the condition. */
+ prob = true_edge->probability;
+ switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
+ seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
+ block_label (true_edge->dest),
+ prob, cinsn);
+ emit_insn_after (seq, BB_END (switch_bb));
+ e = make_edge (switch_bb, true_edge->dest, 0);
+ e->probability = prob;
+ e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
+ e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
+ e->probability = false_edge->probability;
+ e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
- /* Make a copy of the block containing the condition; we will use
- it as switch to decide which loop we want to use. */
- switch_bb = cfg_layout_duplicate_bb (unswitch_on, NULL);
if (irred_flag)
{
switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
@@ -381,19 +460,14 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on)
switch_bb->succ->flags &= ~EDGE_IRREDUCIBLE_LOOP;
switch_bb->succ->succ_next->flags &= ~EDGE_IRREDUCIBLE_LOOP;
}
- unswitch_on->rbi->copy = unswitch_on_alt;
/* Loopify from the copy of LOOP body, constructing the new loop. */
- for (latch_edge = loop->latch->rbi->copy->succ;
- latch_edge->dest != loop->header;
- latch_edge = latch_edge->succ_next);
nloop = loopify (loops, latch_edge,
loop->header->rbi->copy->pred, switch_bb);
- /* Remove branches that are now unreachable in new loops. We rely on the
- fact that cfg_layout_duplicate_bb reverses list of edges. */
- remove_path (loops, unswitch_on->succ);
- remove_path (loops, unswitch_on_alt->succ);
+ /* Remove branches that are now unreachable in new loops. */
+ remove_path (loops, true_edge);
+ remove_path (loops, false_edge);
/* One of created loops do not have to be subloop of the outer loop now,
so fix its placement in loop data structure. */
diff --git a/gcc/predict.c b/gcc/predict.c
index 50580bd..44ee10c 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -406,13 +406,16 @@ estimate_probability (struct loops *loops_info)
unsigned j;
int exits;
struct loop *loop = loops_info->parray[i];
- struct loop_desc desc;
+ struct niter_desc desc;
unsigned HOST_WIDE_INT niter;
flow_loop_scan (loop, LOOP_EXIT_EDGES);
exits = loop->num_exits;
- if (simple_loop_p (loop, &desc) && desc.const_iter)
+ iv_analysis_loop_init (loop);
+ find_simple_exit (loop, &desc);
+
+ if (desc.simple_p && desc.const_iter)
{
int prob;
niter = desc.niter + 1;
@@ -472,6 +475,8 @@ estimate_probability (struct loops *loops_info)
free (bbs);
}
+ iv_analysis_done ();
+
/* Attempt to predict conditional jumps using a number of heuristics. */
FOR_EACH_BB (bb)
{
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 0302466..c2c2a4b 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2361,4 +2361,15 @@ extern void tracer (void);
/* In var-tracking.c */
extern void variable_tracking_main (void);
+/* In stor-layout.c. */
+extern void get_mode_bounds (enum machine_mode, int, rtx *, rtx *);
+
+/* In loop-unswitch.c */
+extern rtx reversed_condition (rtx);
+extern rtx compare_and_jump_seq (rtx, rtx, enum rtx_code, rtx, int, rtx);
+
+/* In loop-iv.c */
+extern rtx canon_condition (rtx);
+extern void simplify_using_condition (rtx, rtx *, struct bitmap_head_def *);
+
#endif /* ! GCC_RTL_H */
diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
index 1272a0c..98420fb 100644
--- a/gcc/stor-layout.c
+++ b/gcc/stor-layout.c
@@ -2118,4 +2118,27 @@ get_best_mode (int bitsize, int bitpos, unsigned int align,
return mode;
}
+/* Gets minimal and maximal values for MODE (signed or unsigned depending on
+ SIGN). */
+
+void
+get_mode_bounds (enum machine_mode mode, int sign, rtx *mmin, rtx *mmax)
+{
+ int size = GET_MODE_BITSIZE (mode);
+
+ if (size > HOST_BITS_PER_WIDE_INT)
+ abort ();
+
+ if (sign)
+ {
+ *mmin = GEN_INT (-((unsigned HOST_WIDE_INT) 1 << (size - 1)));
+ *mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1)) - 1);
+ }
+ else
+ {
+ *mmin = const0_rtx;
+ *mmax = GEN_INT (((unsigned HOST_WIDE_INT) 1 << (size - 1) << 1) - 1);
+ }
+}
+
#include "gt-stor-layout.h"
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 763c5d0..c470cf0 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -3034,11 +3034,16 @@ static void
rest_of_handle_loop2 (tree decl, rtx insns)
{
struct loops *loops;
+ basic_block bb;
+
timevar_push (TV_LOOP);
open_dump_file (DFI_loop2, decl);
if (rtl_dump_file)
dump_flow_info (rtl_dump_file);
+ /* Initialize structures for layout changes. */
+ cfg_layout_initialize ();
+
loops = loop_optimizer_init (rtl_dump_file);
if (loops)
@@ -3056,6 +3061,12 @@ rest_of_handle_loop2 (tree decl, rtx insns)
loop_optimizer_finalize (loops, rtl_dump_file);
}
+ /* Finalize layout changes. */
+ FOR_EACH_BB (bb)
+ if (bb->next_bb != EXIT_BLOCK_PTR)
+ bb->rbi->next = bb->next_bb;
+ cfg_layout_finalize ();
+
cleanup_cfg (CLEANUP_EXPENSIVE);
delete_trivially_dead_insns (insns, max_reg_num ());
reg_scan (insns, max_reg_num (), 0);