aboutsummaryrefslogtreecommitdiff
path: root/gcc/loop-unroll.c
diff options
context:
space:
mode:
authorRevital Eres <eres@il.ibm.com>2004-10-18 05:31:28 +0000
committerMostafa Hagog <hagog@gcc.gnu.org>2004-10-18 05:31:28 +0000
commitf37a4f143d14f12cc7bb9a76e10e7a119bc3d61f (patch)
tree659451af0eb92e8412fce0e70aaf671611d128e4 /gcc/loop-unroll.c
parent535306d0decc4821b3c4b896d5e7b837428c5160 (diff)
downloadgcc-f37a4f143d14f12cc7bb9a76e10e7a119bc3d61f.zip
gcc-f37a4f143d14f12cc7bb9a76e10e7a119bc3d61f.tar.gz
gcc-f37a4f143d14f12cc7bb9a76e10e7a119bc3d61f.tar.bz2
Makefile.in (loop-unroll.o): Add VARRAY_H dependency.
2004-10-18 Revital Eres <eres@il.ibm.com> * Makefile.in (loop-unroll.o): Add VARRAY_H dependency. * loop-unroll.c: Include varray.h. (struct var_to_expand, struct opt_info): Rename split_ivs_info to opt_info and expand it to support variable expansion. (analyze_insns_in_loop): Rename analyze_ivs_to_split and expand it to support variable expansion. (pt_info_start_duplication): Rename si_info_start_duplication. (apply_opt_in_copies): Rename split_ivs_in_copies and add support to the variable expansion optimization. (free_opt_info): Rename free_si_info. (analyze_insn_to_expand_var, referenced_in_one_insn_in_loop_p, expand_var_during_unrolling, insert_var_expansion_initialization, combine_var_copies_in_loop_exit, release_var_copies, get_expansion): New functions. (peel_loop_completely, unroll_loop_constant_iterations, unroll_loop_runtime_iterations, peel_loop_simple, unroll_loop_stupid): Change uses of struct si_info to struct opt_info and add uses of fvariable-expansion-in-unroller flag. * params.def: Add parameter to restrict the number of expansions. * params.h: (MAX_VARIABLE_EXPANSIONS): New define to restrict the number of expansions. * common.opt: (fvariable-expansion-in-unroller): New flag. * doc/invoke.texi: (fvariable-expansion-in-unroller): Document. From-SVN: r89197
Diffstat (limited to 'gcc/loop-unroll.c')
-rw-r--r--gcc/loop-unroll.c762
1 files changed, 587 insertions, 175 deletions
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 6ea0987..1d1eb4d 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -31,7 +31,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "output.h"
#include "expr.h"
#include "hashtab.h"
-#include "recog.h"
+#include "recog.h"
+#include "varray.h"
/* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because
@@ -83,11 +84,34 @@ struct iv_to_split
XEXP (XEXP (single_set, loc[0]), loc[1]). */
};
-struct split_ivs_info
+/* Information about accumulators to expand. */
+
+struct var_to_expand
+{
+ rtx insn; /* The insn in that the variable expansion occurs. */
+ rtx reg; /* The accumulator which is expanded. */
+ varray_type var_expansions; /* The copies of the accumulator which is expanded. */
+ enum rtx_code op; /* The type of the accumulation - addition, subtraction
+ or multiplication. */
+ int expansion_count; /* Count the number of expansions generated so far. */
+ int reuse_expansion; /* The expansion we intend to reuse to expand
+ the accumulator. If REUSE_EXPANSION is 0 reuse
+ the original accumulator. Else use
+ var_expansions[REUSE_EXPANSION - 1]. */
+};
+
+/* Information about optimization applied in
+ the unrolled loop. */
+
+struct opt_info
{
- htab_t insns_to_split; /* A hashtable of insns to split. */
- unsigned first_new_block; /* The first basic block that was
- duplicated. */
+ htab_t insns_to_split; /* A hashtable of insns to split. */
+ htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
+ to expand. */
+ unsigned first_new_block; /* The first basic block that was
+ duplicated. */
+ basic_block loop_exit; /* The loop exit basic block. */
+ basic_block loop_preheader; /* The loop preheader basic block. */
};
static void decide_unrolling_and_peeling (struct loops *, int);
@@ -103,10 +127,18 @@ static void peel_loop_completely (struct loops *, struct loop *);
static void unroll_loop_stupid (struct loops *, struct loop *);
static void unroll_loop_constant_iterations (struct loops *, struct loop *);
static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
-static struct split_ivs_info *analyze_ivs_to_split (struct loop *);
-static void si_info_start_duplication (struct split_ivs_info *);
-static void split_ivs_in_copies (struct split_ivs_info *, unsigned, bool, bool);
-static void free_si_info (struct split_ivs_info *);
+static struct opt_info *analyze_insns_in_loop (struct loop *);
+static void opt_info_start_duplication (struct opt_info *);
+static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
+static void free_opt_info (struct opt_info *);
+static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
+static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
+static struct iv_to_split *analyze_iv_to_split_insn (rtx);
+static void expand_var_during_unrolling (struct var_to_expand *, rtx);
+static int insert_var_expansion_initialization (void **, void *);
+static int combine_var_copies_in_loop_exit (void **, void *);
+static int release_var_copies (void **, void *);
+static rtx get_expansion (struct var_to_expand *);
/* Unroll and/or peel (depending on FLAGS) LOOPS. */
void
@@ -456,8 +488,8 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
unsigned n_remove_edges, i;
edge *remove_edges, ein;
struct niter_desc *desc = get_simple_loop_desc (loop);
- struct split_ivs_info *si_info = NULL;
-
+ struct opt_info *opt_info = NULL;
+
npeel = desc->niter;
if (npeel)
@@ -472,9 +504,9 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
n_remove_edges = 0;
if (flag_split_ivs_in_unroller)
- si_info = analyze_ivs_to_split (loop);
-
- si_info_start_duplication (si_info);
+ opt_info = analyze_insns_in_loop (loop);
+
+ opt_info_start_duplication (opt_info);
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, npeel,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
@@ -482,12 +514,12 @@ peel_loop_completely (struct loops *loops, struct loop *loop)
abort ();
free (wont_exit);
-
- if (si_info)
- {
- split_ivs_in_copies (si_info, npeel, false, true);
- free_si_info (si_info);
- }
+
+ if (opt_info)
+ {
+ apply_opt_in_copies (opt_info, npeel, false, true);
+ free_opt_info (opt_info);
+ }
/* Remove the exit edges. */
for (i = 0; i < n_remove_edges; i++)
@@ -636,8 +668,8 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
unsigned max_unroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
bool exit_at_end = loop_exit_at_end_p (loop);
- struct split_ivs_info *si_info = NULL;
-
+ struct opt_info *opt_info = NULL;
+
niter = desc->niter;
/* Should not get here (such loop should be peeled instead). */
@@ -650,10 +682,10 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
n_remove_edges = 0;
-
- if (flag_split_ivs_in_unroller)
- si_info = analyze_ivs_to_split (loop);
-
+ if (flag_split_ivs_in_unroller
+ || flag_variable_expansion_in_unroller)
+ opt_info = analyze_insns_in_loop (loop);
+
if (!exit_at_end)
{
/* The exit is not at the end of the loop; leave exit test
@@ -670,17 +702,17 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
if (exit_mod)
{
- si_info_start_duplication (si_info);
- if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
+ opt_info_start_duplication (opt_info);
+ if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, exit_mod,
wont_exit, desc->out_edge,
remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
- if (si_info && exit_mod > 1)
- split_ivs_in_copies (si_info, exit_mod, false, false);
-
+ if (opt_info && exit_mod > 1)
+ apply_opt_in_copies (opt_info, exit_mod, false, false);
+
desc->noloop_assumptions = NULL_RTX;
desc->niter -= exit_mod;
desc->niter_max -= exit_mod;
@@ -705,16 +737,16 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
RESET_BIT (wont_exit, 0);
if (desc->noloop_assumptions)
RESET_BIT (wont_exit, 1);
-
- si_info_start_duplication (si_info);
+
+ opt_info_start_duplication (opt_info);
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, exit_mod + 1,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
-
- if (si_info && exit_mod > 0)
- split_ivs_in_copies (si_info, exit_mod + 1, false, false);
+
+ if (opt_info && exit_mod > 0)
+ apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
desc->niter -= exit_mod + 1;
desc->niter_max -= exit_mod + 1;
@@ -728,17 +760,18 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
}
/* Now unroll the loop. */
- si_info_start_duplication (si_info);
+
+ opt_info_start_duplication (opt_info);
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
loops, max_unroll,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
- if (si_info)
+ if (opt_info)
{
- split_ivs_in_copies (si_info, max_unroll, true, true);
- free_si_info (si_info);
+ apply_opt_in_copies (opt_info, max_unroll, true, true);
+ free_opt_info (opt_info);
}
free (wont_exit);
@@ -900,11 +933,12 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
unsigned max_unroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
bool exit_at_end = loop_exit_at_end_p (loop);
- struct split_ivs_info *si_info = NULL;
-
- if (flag_split_ivs_in_unroller)
- si_info = analyze_ivs_to_split (loop);
-
+ struct opt_info *opt_info = NULL;
+
+ if (flag_split_ivs_in_unroller
+ || flag_variable_expansion_in_unroller)
+ opt_info = analyze_insns_in_loop (loop);
+
/* Remember blocks whose dominators will have to be updated. */
dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
n_dom_bbs = 0;
@@ -1040,18 +1074,18 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
sbitmap_ones (wont_exit);
RESET_BIT (wont_exit, may_exit_copy);
-
- si_info_start_duplication (si_info);
+ opt_info_start_duplication (opt_info);
+
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
loops, max_unroll,
wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
-
- if (si_info)
+
+ if (opt_info)
{
- split_ivs_in_copies (si_info, max_unroll, true, true);
- free_si_info (si_info);
+ apply_opt_in_copies (opt_info, max_unroll, true, true);
+ free_opt_info (opt_info);
}
free (wont_exit);
@@ -1206,26 +1240,27 @@ peel_loop_simple (struct loops *loops, struct loop *loop)
sbitmap wont_exit;
unsigned npeel = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
- struct split_ivs_info *si_info = NULL;
-
+ struct opt_info *opt_info = NULL;
+
if (flag_split_ivs_in_unroller && npeel > 1)
- si_info = analyze_ivs_to_split (loop);
-
+ opt_info = analyze_insns_in_loop (loop);
+
wont_exit = sbitmap_alloc (npeel + 1);
sbitmap_zero (wont_exit);
-
- si_info_start_duplication (si_info);
+
+ opt_info_start_duplication (opt_info);
+
if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
loops, npeel, wont_exit, NULL, NULL, NULL,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
free (wont_exit);
-
- if (si_info)
+
+ if (opt_info)
{
- split_ivs_in_copies (si_info, npeel, false, false);
- free_si_info (si_info);
+ apply_opt_in_copies (opt_info, npeel, false, false);
+ free_opt_info (opt_info);
}
if (desc->simple_p)
@@ -1350,24 +1385,26 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop)
sbitmap wont_exit;
unsigned nunroll = loop->lpt_decision.times;
struct niter_desc *desc = get_simple_loop_desc (loop);
- struct split_ivs_info *si_info = NULL;
-
- if (flag_split_ivs_in_unroller)
- si_info = analyze_ivs_to_split (loop);
-
+ struct opt_info *opt_info = NULL;
+
+ if (flag_split_ivs_in_unroller
+ || flag_variable_expansion_in_unroller)
+ opt_info = analyze_insns_in_loop (loop);
+
+
wont_exit = sbitmap_alloc (nunroll + 1);
sbitmap_zero (wont_exit);
-
- si_info_start_duplication (si_info);
+ opt_info_start_duplication (opt_info);
+
if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
loops, nunroll, wont_exit, NULL, NULL, NULL,
DLTHE_FLAG_UPDATE_FREQ))
abort ();
-
- if (si_info)
+
+ if (opt_info)
{
- split_ivs_in_copies (si_info, nunroll, true, true);
- free_si_info (si_info);
+ apply_opt_in_copies (opt_info, nunroll, true, true);
+ free_opt_info (opt_info);
}
free (wont_exit);
@@ -1407,10 +1444,152 @@ si_info_eq (const void *ivts1, const void *ivts2)
return i1->insn == i2->insn;
}
+/* Return a hash for VES, which is really a "var_to_expand *". */
+
+static hashval_t
+ve_info_hash (const void *ves)
+{
+ return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
+}
+
+/* Return true if IVTS1 and IVTS2 (which are really both of type
+ "var_to_expand *") refer to the same instruction. */
+
+static int
+ve_info_eq (const void *ivts1, const void *ivts2)
+{
+ const struct var_to_expand *i1 = ivts1;
+ const struct var_to_expand *i2 = ivts2;
+
+ return i1->insn == i2->insn;
+}
+
+/* Returns true if REG is referenced in one insn in LOOP. */
+
+bool
+referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
+{
+ basic_block *body, bb;
+ unsigned i;
+ int count_ref = 0;
+ rtx insn;
+
+ body = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ bb = body[i];
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (rtx_referenced_p (reg, insn))
+ count_ref++;
+ }
+ }
+ return (count_ref == 1);
+}
+
+/* Determine whether INSN contains an accumulator
+ which can be expanded into separate copies,
+ one for each copy of the LOOP body.
+
+ for (i = 0 ; i < n; i++)
+ sum += a[i];
+
+ ==>
+
+ sum += a[i]
+ ....
+ i = i+1;
+ sum1 += a[i]
+ ....
+ i = i+1
+ sum2 += a[i];
+ ....
+
+ Return NULL if INSN contains no opportunity for expansion of accumulator.
+ Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
+ information and return a pointer to it.
+*/
+
+static struct var_to_expand *
+analyze_insn_to_expand_var (struct loop *loop, rtx insn)
+{
+ rtx set, dest, src, op1;
+ struct var_to_expand *ves;
+ enum machine_mode mode1, mode2;
+
+ set = single_set (insn);
+ if (!set)
+ return NULL;
+
+ dest = SET_DEST (set);
+ src = SET_SRC (set);
+
+ if (GET_CODE (src) != PLUS
+ && GET_CODE (src) != MINUS
+ && GET_CODE (src) != MULT)
+ return NULL;
+
+ if (!XEXP (src, 0))
+ return NULL;
+
+ op1 = XEXP (src, 0);
+
+ if (!REG_P (dest)
+ && !(GET_CODE (dest) == SUBREG
+ && REG_P (SUBREG_REG (dest))))
+ return NULL;
+
+ if (!rtx_equal_p (dest, op1))
+ return NULL;
+
+ if (!referenced_in_one_insn_in_loop_p (loop, dest))
+ return NULL;
+
+ if (rtx_referenced_p (dest, XEXP (src, 1)))
+ return NULL;
+
+ mode1 = GET_MODE (dest);
+ mode2 = GET_MODE (XEXP (src, 1));
+ if ((FLOAT_MODE_P (mode1)
+ || FLOAT_MODE_P (mode2))
+ && !flag_unsafe_math_optimizations)
+ return NULL;
+
+ /* Record the accumulator to expand. */
+ ves = xmalloc (sizeof (struct var_to_expand));
+ ves->insn = insn;
+ VARRAY_RTX_INIT (ves->var_expansions, 1, "var_expansions");
+ ves->reg = copy_rtx (dest);
+ ves->op = GET_CODE (src);
+ ves->expansion_count = 0;
+ ves->reuse_expansion = 0;
+ return ves;
+}
+
/* Determine whether there is an induction variable in INSN that
- we would like to split during unrolling. Return NULL if INSN
- contains no interesting IVs. Otherwise, allocate an IV_TO_SPLIT
- structure, fill it with the relevant information and return a
+ we would like to split during unrolling.
+
+ I.e. replace
+
+ i = i + 1;
+ ...
+ i = i + 1;
+ ...
+ i = i + 1;
+ ...
+
+ type chains by
+
+ i0 = i + 1
+ ...
+ i = i0 + 1
+ ...
+ i = i0 + 2
+ ...
+
+ Return NULL if INSN contains no interesting IVs. Otherwise, allocate
+ an IV_TO_SPLIT structure, fill it with the relevant information and return a
pointer to it. */
static struct iv_to_split *
@@ -1451,27 +1630,56 @@ analyze_iv_to_split_insn (rtx insn)
return ivts;
}
-/* Determines which of induction variables in LOOP to split.
- Return a SPLIT_IVS_INFO struct with the hash table filled
- with all insns to split IVs in. The FIRST_NEW_BLOCK field
+/* Determines which of insns in LOOP can be optimized.
+ Return a OPT_INFO struct with the relevant hash tables filled
+ with all insns to be optimized. The FIRST_NEW_BLOCK field
is undefined for the return value. */
-static struct split_ivs_info *
-analyze_ivs_to_split (struct loop *loop)
+static struct opt_info *
+analyze_insns_in_loop (struct loop *loop)
{
basic_block *body, bb;
- unsigned i;
- struct split_ivs_info *si_info = xcalloc (1, sizeof (struct split_ivs_info));
+ unsigned i, n_edges = 0;
+ struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
rtx insn;
- struct iv_to_split *ivts;
- PTR *slot;
-
- si_info->insns_to_split = htab_create (5 * loop->num_nodes,
- si_info_hash, si_info_eq, free);
-
+ struct iv_to_split *ivts = NULL;
+ struct var_to_expand *ves = NULL;
+ PTR *slot1;
+ PTR *slot2;
+ edge *edges = get_loop_exit_edges (loop, &n_edges);
+ basic_block preheader;
+ bool can_apply = false;
+
iv_analysis_loop_init (loop);
body = get_loop_body (loop);
+
+ if (flag_split_ivs_in_unroller)
+ opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
+ si_info_hash, si_info_eq, free);
+
+ /* Record the loop exit bb and loop preheader before the unrolling. */
+ if (!loop_preheader_edge (loop)->src)
+ {
+ preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
+ opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
+ }
+ else
+ opt_info->loop_preheader = loop_preheader_edge (loop)->src;
+
+ if (n_edges == 1
+ && !(edges[0]->flags & EDGE_COMPLEX)
+ && (edges[0]->flags & EDGE_LOOP_EXIT))
+ {
+ opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
+ can_apply = true;
+ }
+
+ if (flag_variable_expansion_in_unroller
+ && can_apply)
+ opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
+ ve_info_hash, ve_info_eq, free);
+
for (i = 0; i < loop->num_nodes; i++)
{
bb = body[i];
@@ -1479,33 +1687,44 @@ analyze_ivs_to_split (struct loop *loop)
continue;
FOR_BB_INSNS (bb, insn)
- {
- if (!INSN_P (insn))
- continue;
-
- ivts = analyze_iv_to_split_insn (insn);
-
- if (!ivts)
- continue;
-
- slot = htab_find_slot (si_info->insns_to_split, ivts, INSERT);
- *slot = ivts;
- }
+ {
+ if (!INSN_P (insn))
+ continue;
+
+ if (opt_info->insns_to_split)
+ ivts = analyze_iv_to_split_insn (insn);
+
+ if (ivts)
+ {
+ slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
+ *slot1 = ivts;
+ continue;
+ }
+
+ if (opt_info->insns_with_var_to_expand)
+ ves = analyze_insn_to_expand_var (loop, insn);
+
+ if (ves)
+ {
+ slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
+ *slot2 = ves;
+ }
+ }
}
-
+
+ free (edges);
free (body);
-
- return si_info;
+ return opt_info;
}
/* Called just before loop duplication. Records start of duplicated area
- to SI_INFO. */
+ to OPT_INFO. */
static void
-si_info_start_duplication (struct split_ivs_info *si_info)
+opt_info_start_duplication (struct opt_info *opt_info)
{
- if (si_info)
- si_info->first_new_block = last_basic_block;
+ if (opt_info)
+ opt_info->first_new_block = last_basic_block;
}
/* Determine the number of iterations between initialization of the base
@@ -1641,121 +1860,314 @@ split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
delete_insn (insn);
}
-/* Splits induction variables (that are marked in SI_INFO) in copies of loop.
- I.e. replace
- i = i + 1;
- ...
- i = i + 1;
- ...
- i = i + 1;
- ...
+/* Return one expansion of the accumulator recoreded
+ in struct VE. */
- type chains by
+static rtx
+get_expansion (struct var_to_expand *ve)
+{
+ rtx reg;
+
+ if (ve->reuse_expansion == 0)
+ reg = ve->reg;
+ else
+ reg = VARRAY_RTX (ve->var_expansions, ve->reuse_expansion - 1);
+
+ if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == (unsigned) ve->reuse_expansion)
+ ve->reuse_expansion = 0;
+ else
+ ve->reuse_expansion++;
+
+ return reg;
+}
- i0 = i + 1
- ...
- i = i0 + 1
- ...
- i = i0 + 2
- ...
+/* Given INSN replace the uses of the accumulator recorded in VE
+ with a new register. */
+
+static void
+expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
+{
+ rtx new_reg, set;
+ bool really_new_expansion = false;
+
+ set = single_set (insn);
+ if (!set)
+ abort ();
+
+ /* Generate a new register only if the expansion limit has not been
+ reached. Else reuse an already existing expansion. */
+ if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
+ {
+ really_new_expansion = true;
+ new_reg = gen_reg_rtx (GET_MODE (ve->reg));
+ }
+ else
+ new_reg = get_expansion (ve);
+
+ validate_change (insn, &SET_DEST (set), new_reg, 1);
+ validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
+
+ if (apply_change_group ())
+ if (really_new_expansion)
+ {
+ VARRAY_PUSH_RTX (ve->var_expansions, new_reg);
+ ve->expansion_count++;
+ }
+}
+
+/* Initialize the variable expansions in loop preheader.
+ Callbacks for htab_traverse. PLACE_P is the loop-preheader
+ basic block where the initializtion of the expansions
+ should take place. */
+
+static int
+insert_var_expansion_initialization (void **slot, void *place_p)
+{
+ struct var_to_expand *ve = *slot;
+ basic_block place = (basic_block)place_p;
+ rtx seq, var, zero_init, insn;
+ unsigned i;
+
+ if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
+ return 1;
+
+ start_sequence ();
+ if (ve->op == PLUS || ve->op == MINUS)
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
+ {
+ var = VARRAY_RTX (ve->var_expansions, i);
+ zero_init = CONST0_RTX (GET_MODE (var));
+ emit_move_insn (var, zero_init);
+ }
+ else if (ve->op == MULT)
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
+ {
+ var = VARRAY_RTX (ve->var_expansions, i);
+ zero_init = CONST1_RTX (GET_MODE (var));
+ emit_move_insn (var, zero_init);
+ }
+
+ seq = get_insns ();
+ end_sequence ();
+
+ insn = BB_HEAD (place);
+ while (!NOTE_INSN_BASIC_BLOCK_P (insn))
+ insn = NEXT_INSN (insn);
+
+ emit_insn_after (seq, insn);
+ /* Continue traversing the hash table. */
+ return 1;
+}
+
+/* Combine the variable expansions at the loop exit.
+ Callbacks for htab_traverse. PLACE_P is the loop exit
+ basic block where the summation of the expansions should
+ take place. */
+
+static int
+combine_var_copies_in_loop_exit (void **slot, void *place_p)
+{
+ struct var_to_expand *ve = *slot;
+ basic_block place = (basic_block)place_p;
+ rtx sum = ve->reg;
+ rtx expr, seq, var, insn;
+ unsigned i;
+
+ if (VARRAY_ACTIVE_SIZE (ve->var_expansions) == 0)
+ return 1;
+
+ start_sequence ();
+ if (ve->op == PLUS || ve->op == MINUS)
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
+ {
+ var = VARRAY_RTX (ve->var_expansions, i);
+ sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
+ var, sum);
+ }
+ else if (ve->op == MULT)
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (ve->var_expansions); i++)
+ {
+ var = VARRAY_RTX (ve->var_expansions, i);
+ sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
+ var, sum);
+ }
+
+ expr = force_operand (sum, ve->reg);
+ if (expr != ve->reg)
+ emit_move_insn (ve->reg, expr);
+ seq = get_insns ();
+ end_sequence ();
+
+ insn = BB_HEAD (place);
+ while (!NOTE_INSN_BASIC_BLOCK_P (insn))
+ insn = NEXT_INSN (insn);
+
+ emit_insn_after (seq, insn);
+
+ /* Continue traversing the hash table. */
+ return 1;
+}
+
+/* Apply loop optimizations in loop copies using the
+ data which gathered during the unrolling. Structure
+ OPT_INFO record that data.
+
UNROLLING is true if we unrolled (not peeled) the loop.
REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
the loop (as it should happen in complete unrolling, but not in ordinary
peeling of the loop). */
static void
-split_ivs_in_copies (struct split_ivs_info *si_info, unsigned n_copies,
- bool unrolling, bool rewrite_original_loop)
+apply_opt_in_copies (struct opt_info *opt_info,
+ unsigned n_copies, bool unrolling,
+ bool rewrite_original_loop)
{
unsigned i, delta;
basic_block bb, orig_bb;
rtx insn, orig_insn, next;
struct iv_to_split ivts_templ, *ivts;
-
+ struct var_to_expand ve_templ, *ves;
+
/* Sanity check -- we need to put initialization in the original loop
body. */
gcc_assert (!unrolling || rewrite_original_loop);
-
+
/* Allocate the basic variables (i0). */
- htab_traverse (si_info->insns_to_split, allocate_basic_variable, NULL);
-
- for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
+ if (opt_info->insns_to_split)
+ htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
+
+ for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
{
bb = BASIC_BLOCK (i);
orig_bb = bb->rbi->original;
-
+
delta = determine_split_iv_delta (bb->rbi->copy_number, n_copies,
unrolling);
orig_insn = BB_HEAD (orig_bb);
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
- {
- next = NEXT_INSN (insn);
- if (!INSN_P (insn))
- continue;
-
- while (!INSN_P (orig_insn))
- orig_insn = NEXT_INSN (orig_insn);
-
- ivts_templ.insn = orig_insn;
- ivts = htab_find (si_info->insns_to_split, &ivts_templ);
- if (ivts)
- {
-
+ {
+ next = NEXT_INSN (insn);
+ if (!INSN_P (insn))
+ continue;
+
+ while (!INSN_P (orig_insn))
+ orig_insn = NEXT_INSN (orig_insn);
+
+ ivts_templ.insn = orig_insn;
+ ve_templ.insn = orig_insn;
+
+ /* Apply splitting iv optimization. */
+ if (opt_info->insns_to_split)
+ {
+ ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
+
+ if (ivts)
+ {
#ifdef ENABLE_CHECKING
- if (!rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)))
- abort ();
+ gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
#endif
-
- if (!delta)
- insert_base_initialization (ivts, insn);
- split_iv (ivts, insn, delta);
- }
- orig_insn = NEXT_INSN (orig_insn);
- }
+
+ if (!delta)
+ insert_base_initialization (ivts, insn);
+ split_iv (ivts, insn, delta);
+ }
+ }
+ /* Apply variable expansion optimization. */
+ if (unrolling && opt_info->insns_with_var_to_expand)
+ {
+ ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
+ if (ves)
+ {
+#ifdef ENABLE_CHECKING
+ gcc_assert (rtx_equal_p (PATTERN (insn), PATTERN (orig_insn)));
+#endif
+ expand_var_during_unrolling (ves, insn);
+ }
+ }
+ orig_insn = NEXT_INSN (orig_insn);
+ }
}
if (!rewrite_original_loop)
return;
-
+
+ /* Initialize the variable expansions in the loop preheader
+ and take care of combining them at the loop exit. */
+ if (opt_info->insns_with_var_to_expand)
+ {
+ htab_traverse (opt_info->insns_with_var_to_expand,
+ insert_var_expansion_initialization,
+ opt_info->loop_preheader);
+ htab_traverse (opt_info->insns_with_var_to_expand,
+ combine_var_copies_in_loop_exit,
+ opt_info->loop_exit);
+ }
+
/* Rewrite also the original loop body. Find them as originals of the blocks
in the last copied iteration, i.e. those that have
bb->rbi->original->copy == bb. */
- for (i = si_info->first_new_block; i < (unsigned) last_basic_block; i++)
+ for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
{
bb = BASIC_BLOCK (i);
orig_bb = bb->rbi->original;
if (orig_bb->rbi->copy != bb)
continue;
-
+
delta = determine_split_iv_delta (0, n_copies, unrolling);
for (orig_insn = BB_HEAD (orig_bb);
- orig_insn != NEXT_INSN (BB_END (bb));
- orig_insn = next)
- {
- next = NEXT_INSN (orig_insn);
-
- if (!INSN_P (orig_insn))
- continue;
+ orig_insn != NEXT_INSN (BB_END (bb));
+ orig_insn = next)
+ {
+ next = NEXT_INSN (orig_insn);
+
+ if (!INSN_P (orig_insn))
+ continue;
+
+ ivts_templ.insn = orig_insn;
+ if (opt_info->insns_to_split)
+ {
+ ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
+ if (ivts)
+ {
+ if (!delta)
+ insert_base_initialization (ivts, orig_insn);
+ split_iv (ivts, orig_insn, delta);
+ continue;
+ }
+ }
+
+ }
+ }
+}
- ivts_templ.insn = orig_insn;
- ivts = htab_find (si_info->insns_to_split, &ivts_templ);
- if (!ivts)
- continue;
+/* Release the data structures used for the variable expansion
+ optimization. Callbacks for htab_traverse. */
- if (!delta)
- insert_base_initialization (ivts, orig_insn);
- split_iv (ivts, orig_insn, delta);
- }
- }
+static int
+release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
+{
+ struct var_to_expand *ve = *slot;
+
+ VARRAY_CLEAR (ve->var_expansions);
+
+ /* Continue traversing the hash table. */
+ return 1;
}
-/* Release SI_INFO. */
+/* Release OPT_INFO. */
static void
-free_si_info (struct split_ivs_info *si_info)
+free_opt_info (struct opt_info *opt_info)
{
- htab_delete (si_info->insns_to_split);
- free (si_info);
+ if (opt_info->insns_to_split)
+ htab_delete (opt_info->insns_to_split);
+ if (opt_info->insns_with_var_to_expand)
+ {
+ htab_traverse (opt_info->insns_with_var_to_expand,
+ release_var_copies, NULL);
+ htab_delete (opt_info->insns_with_var_to_expand);
+ }
+ free (opt_info);
}