aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog28
-rw-r--r--gcc/config/spu/spu.md42
-rw-r--r--gcc/ddg.c10
-rw-r--r--gcc/loop-doloop.c82
-rw-r--r--gcc/modulo-sched.c148
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.dg/sms-1.c38
7 files changed, 293 insertions, 62 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 82c559d4c..b440e93 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@
+2007-08-28 Mircea Namolaru <namolaru@il.ibm.com>
+ Vladimir Yanovsky <yanov@il.ibm.com>
+ Revital Eres <eres@il.ibm.com>
+ Andrey Belevantsev <abel@ispras.ru>
+
+ * config/spu/spu.md: Recognize doloop pattern when -fmodulo-sched
+ is set.
+ * modulo-sched.c: Add documentation regarding do-loop.
+ (doloop_register_get): Change number of arguments to support
+ the new do-loop pattern and check whether COUNT_REG has no other
+ occurences in the loop besides in the control part.
+ (duplicate_insns_of_cycles): Do not duplicate the insn which
+ changes count_reg as it is already adjusted.
+ (generate_prolog_epilog): New argument to support the new
+ do-loop pattern. Change the subtract instruction to use
+ expand_simple_binop. Call duplicate_insns_of_cycles with new
+ argument.
+ (sms_schedule): Call doloop_register_get and
+ generate_prolog_epilog with new argument. Do not handle loops
+ with single sets insns with subreg in their lhs.
+ * loop-doloop.c (doloop_optimize): Support for another do-loop
+ pattern.
+ (doloop_condition_get): Gets an instruction instead of a pattern
+ and change the return condition when the do-loop pattern is
+ not parallel.
+ * ddg.c (create_ddg_dep_from_intra_loop_link): Handle only reg
+ deps when considering to not create edges.
+
2007-08-27 Alexandre Oliva <aoliva@redhat.com>
* doc/extend.texi (gnu_inline funtion attribute): Document C++
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index 5dcc45e..1afdd11 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -3887,6 +3887,48 @@ selb\t%0,%4,%0,%3"
[(set_attr "type" "br")])
+
+ ;; Define the subtract-one-and-jump insns so loop.c
+ ;; knows what to generate.
+ (define_expand "doloop_end"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
+ (use (match_operand 2 "" "")) ; max iterations
+ (use (match_operand 3 "" "")) ; loop level
+ (use (match_operand 4 "" ""))] ; label
+ ""
+ "
+ {
+ /* Currently SMS relies on the do-loop pattern to recognize loops
+ where (1) the control part comprises of all insns defining and/or
+ using a certain 'count' register and (2) the loop count can be
+ adjusted by modifying this register prior to the loop.
+. ??? The possible introduction of a new block to initialize the
+ new IV can potentially effects branch optimizations. */
+ if (optimize > 0 && flag_modulo_sched)
+ {
+ rtx s0;
+ rtx bcomp;
+ rtx loc_ref;
+
+ /* Only use this on innermost loops. */
+ if (INTVAL (operands[3]) > 1)
+ FAIL;
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+
+ s0 = operands [0];
+ emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1)));
+ bcomp = gen_rtx_NE(SImode, s0, const0_rtx);
+ loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
+ loc_ref, pc_rtx)));
+
+ DONE;
+ }
+ }")
+
;; convert between any two modes, avoiding any GCC assumptions
(define_expand "spu_convert"
[(set (match_operand 0 "spu_reg_operand" "")
diff --git a/gcc/ddg.c b/gcc/ddg.c
index 295811d..f1ec8fd 100644
--- a/gcc/ddg.c
+++ b/gcc/ddg.c
@@ -176,13 +176,17 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
rtx set;
set = single_set (dest_node->insn);
- if (set)
+ /* TODO: Handle registers that REG_P is not true for them, i.e.
+ subregs and special registers. */
+ if (set && REG_P (SET_DEST (set)))
{
int regno = REGNO (SET_DEST (set));
- struct df_ref *first_def =
- df_bb_regno_first_def_find (g->bb, regno);
+ struct df_ref *first_def;
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
+ first_def = df_bb_regno_first_def_find (g->bb, regno);
+ gcc_assert (first_def);
+
if (bitmap_bit_p (bb_info->gen, first_def->id))
return;
}
diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c
index 162ef4a..6e33a4f 100644
--- a/gcc/loop-doloop.c
+++ b/gcc/loop-doloop.c
@@ -69,35 +69,59 @@ along with GCC; see the file COPYING3. If not see
if it is not a decrement and branch jump insn. */
rtx
-doloop_condition_get (rtx pattern)
+doloop_condition_get (rtx doloop_pat)
{
rtx cmp;
rtx inc;
rtx reg;
rtx inc_src;
rtx condition;
+ rtx pattern;
- /* The canonical doloop pattern we expect is:
+ /* The canonical doloop pattern we expect has one of the following
+ forms:
- (parallel [(set (pc) (if_then_else (condition)
- (label_ref (label))
- (pc)))
- (set (reg) (plus (reg) (const_int -1)))
- (additional clobbers and uses)])
+ 1) (parallel [(set (pc) (if_then_else (condition)
+ (label_ref (label))
+ (pc)))
+ (set (reg) (plus (reg) (const_int -1)))
+ (additional clobbers and uses)])
- Some targets (IA-64) wrap the set of the loop counter in
- an if_then_else too.
+ The branch must be the first entry of the parallel (also required
+ by jump.c), and the second entry of the parallel must be a set of
+ the loop counter register. Some targets (IA-64) wrap the set of
+ the loop counter in an if_then_else too.
- In summary, the branch must be the first entry of the
- parallel (also required by jump.c), and the second
- entry of the parallel must be a set of the loop counter
- register. */
+ 2) (set (reg) (plus (reg) (const_int -1))
+ (set (pc) (if_then_else (reg != 0)
+ (label_ref (label))
+ (pc))). */
+
+ pattern = PATTERN (doloop_pat);
if (GET_CODE (pattern) != PARALLEL)
- return 0;
+ {
+ rtx cond;
+
+ /* We expect the decrement to immediately precede the branch. */
- cmp = XVECEXP (pattern, 0, 0);
- inc = XVECEXP (pattern, 0, 1);
+ if ((PREV_INSN (doloop_pat) == NULL_RTX)
+ || !INSN_P (PREV_INSN (doloop_pat)))
+ return 0;
+
+ cmp = pattern;
+ inc = PATTERN (PREV_INSN (doloop_pat));
+ /* We expect the condition to be of the form (reg != 0) */
+ cond = XEXP (SET_SRC (cmp), 0);
+ if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
+ return 0;
+
+ }
+ else
+ {
+ cmp = XVECEXP (pattern, 0, 0);
+ inc = XVECEXP (pattern, 0, 1);
+ }
/* Check for (set (reg) (something)). */
if (GET_CODE (inc) != SET)
@@ -139,7 +163,29 @@ doloop_condition_get (rtx pattern)
if ((XEXP (condition, 0) == reg)
|| (GET_CODE (XEXP (condition, 0)) == PLUS
&& XEXP (XEXP (condition, 0), 0) == reg))
+ {
+ if (GET_CODE (pattern) != PARALLEL)
+ /* The second form we expect:
+
+ (set (reg) (plus (reg) (const_int -1))
+ (set (pc) (if_then_else (reg != 0)
+ (label_ref (label))
+ (pc))).
+
+ is equivalent to the following:
+
+ (parallel [(set (pc) (if_then_else (reg != 1)
+ (label_ref (label))
+ (pc)))
+ (set (reg) (plus (reg) (const_int -1)))
+ (additional clobbers and uses)])
+
+ So we return that form instead.
+ */
+ condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
+
return condition;
+ }
/* ??? If a machine uses a funny comparison, we could return a
canonicalized form here. */
@@ -597,9 +643,7 @@ doloop_optimize (struct loop *loop)
{
while (NEXT_INSN (doloop_pat) != NULL_RTX)
doloop_pat = NEXT_INSN (doloop_pat);
- if (JUMP_P (doloop_pat))
- doloop_pat = PATTERN (doloop_pat);
- else
+ if (!JUMP_P (doloop_pat))
doloop_pat = NULL_RTX;
}
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 73c4adc..7e9f6aa 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -82,8 +82,21 @@ along with GCC; see the file COPYING3. If not see
perform modulo variable expansion for live ranges that span more than
II cycles (i.e. use register copies to prevent a def from overwriting
itself before reaching the use).
-*/
+ SMS works with countable loops (1) whose control part can be easily
+ decoupled from the rest of the loop and (2) whose loop count can
+ be easily adjusted. This is because we peel a constant number of
+ iterations into a prologue and epilogue for which we want to avoid
+ emitting the control part, and a kernel which is to iterate that
+ constant number of iterations less than the original loop. So the
+ control part should be a set of insns clearly identified and having
+ its own iv, not otherwise used in the loop (at-least for now), which
+ initializes a register before the loop to the number of iterations.
+ Currently SMS relies on the do-loop pattern to recognize such loops,
+ where (1) the control part comprises of all insns defining and/or
+ using a certain 'count' register and (2) the loop count can be
+ adjusted by modifying this register prior to the loop.
+ TODO: Rely on cfgloop analysis instead. */
/* This page defines partial-schedule structures and functions for
modulo scheduling. */
@@ -182,10 +195,11 @@ static int sms_order_nodes (ddg_ptr, int, int * result);
static void set_node_sched_params (ddg_ptr);
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
-static void generate_prolog_epilog (partial_schedule_ptr ,struct loop * loop, rtx);
+static void generate_prolog_epilog (partial_schedule_ptr, struct loop *loop,
+ rtx, rtx);
static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
int from_stage, int to_stage,
- int is_prolog);
+ int is_prolog, rtx count_reg);
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
@@ -261,20 +275,22 @@ static struct sched_info sms_sched_info =
};
-/* Return the register decremented and tested in INSN,
- or zero if it is not a decrement-and-branch insn. */
-
+/* Given HEAD and TAIL which are the first and last insns in a loop;
+ return the register which controls the loop. Return zero if it has
+ more than one occurrence in the loop besides the control part or the
+ do-loop pattern is not of the form we expect. */
static rtx
-doloop_register_get (rtx insn ATTRIBUTE_UNUSED)
+doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
{
#ifdef HAVE_doloop_end
- rtx pattern, reg, condition;
+ rtx reg, condition, insn;
+ bool found = false;
- if (! JUMP_P (insn))
+ if (!JUMP_P (tail))
return NULL_RTX;
- pattern = PATTERN (insn);
- condition = doloop_condition_get (pattern);
+ /* TODO: Free SMS's dependence on doloop_condition_get. */
+ condition = doloop_condition_get (tail);
if (! condition)
return NULL_RTX;
@@ -286,6 +302,29 @@ doloop_register_get (rtx insn ATTRIBUTE_UNUSED)
else
gcc_unreachable ();
+ /* Check that the COUNT_REG has no other occurrences in the loop
+ until the decrement. We assume the control part consists of
+ either a single (parallel) branch-on-count or a (non-parallel)
+ branch immediately preceded by a single (decrement) insn. */
+ for (insn = head; insn != PREV_INSN (tail); insn = NEXT_INSN (insn))
+ if ((found = reg_mentioned_p (reg, insn)) == true)
+ break;
+ if (found)
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS count_reg found outside control\n");
+
+ return NULL_RTX;
+ }
+ /* One last check in case the do-loop pattern is parallel. */
+ if (GET_CODE (PATTERN (tail)) == PARALLEL)
+ if (reg_mentioned_p (reg, PREV_INSN (tail)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS count_reg found outside control\n");
+
+ return NULL_RTX;
+ }
return reg;
#else
return NULL_RTX;
@@ -583,7 +622,7 @@ permute_partial_schedule (partial_schedule_ptr ps, rtx last)
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
- int to_stage, int for_prolog)
+ int to_stage, int for_prolog, rtx count_reg)
{
int row;
ps_insn_ptr ps_ij;
@@ -595,6 +634,13 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
int j, i_reg_moves;
rtx reg_move = NULL_RTX;
+ /* Do not duplicate any insn which refers to count_reg as it
+ belongs to the control part.
+ TODO: This should be done by analyzing the control part of
+ the loop. */
+ if (reg_mentioned_p (count_reg, u_node->insn))
+ continue;
+
if (for_prolog)
{
/* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
@@ -643,23 +689,35 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
/* Generate the instructions (including reg_moves) for prolog & epilog. */
static void
-generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_reg)
+generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
+ rtx count_reg, rtx count_init)
{
int i;
int last_stage = PS_STAGE_COUNT (ps) - 1;
edge e;
-
+
/* Generate the prolog, inserting its insns on the loop-entry edge. */
start_sequence ();
- if (count_reg)
- /* Generate a subtract instruction at the beginning of the prolog to
- adjust the loop count by STAGE_COUNT. */
- emit_insn (gen_sub2_insn (count_reg, GEN_INT (last_stage)));
+ if (!count_init)
+ {
+ /* Generate instructions at the beginning of the prolog to
+ adjust the loop count by STAGE_COUNT. If loop count is constant
+ (count_init), this constant is adjusted by STAGE_COUNT in
+ generate_prolog_epilog function. */
+ rtx sub_reg = NULL_RTX;
+
+ sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
+ count_reg, GEN_INT (last_stage),
+ count_reg, 1, OPTAB_DIRECT);
+ gcc_assert (REG_P (sub_reg));
+ if (REGNO (sub_reg) != REGNO (count_reg))
+ emit_move_insn (count_reg, sub_reg);
+ }
for (i = 0; i < last_stage; i++)
- duplicate_insns_of_cycles (ps, 0, i, 1);
-
+ duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
+
/* Put the prolog on the entry edge. */
e = loop_preheader_edge (loop);
split_edge_and_insert (e, get_insns ());
@@ -670,8 +728,8 @@ generate_prolog_epilog (partial_schedule_ptr ps, struct loop * loop, rtx count_r
start_sequence ();
for (i = 0; i < last_stage; i++)
- duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
-
+ duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
+
/* Put the epilogue on the exit edge. */
gcc_assert (single_exit (loop));
e = single_exit (loop);
@@ -907,20 +965,27 @@ sms_schedule (void)
}
/* Make sure this is a doloop. */
- if ( !(count_reg = doloop_register_get (tail)))
+ if ( !(count_reg = doloop_register_get (head, tail)))
continue;
/* Don't handle BBs with calls or barriers, or !single_set insns,
or auto-increment insns (to avoid creating invalid reg-moves
for the auto-increment insns).
- ??? Should handle auto-increment insns. */
- for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
- if (CALL_P (insn)
- || BARRIER_P (insn)
- || (INSN_P (insn) && !JUMP_P (insn)
- && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
- || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0))
- break;
+ ??? Should handle auto-increment insns.
+ ??? Should handle insns defining subregs. */
+ for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
+ {
+ rtx set;
+
+ if (CALL_P (insn)
+ || BARRIER_P (insn)
+ || (INSN_P (insn) && !JUMP_P (insn)
+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
+ || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
+ || (INSN_P (insn) && (set = single_set (insn))
+ && GET_CODE (SET_DEST (set)) == SUBREG))
+ break;
+ }
if (insn != NEXT_INSN (tail))
{
@@ -932,8 +997,11 @@ sms_schedule (void)
fprintf (dump_file, "SMS loop-with-barrier\n");
else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
fprintf (dump_file, "SMS reg inc\n");
- else
- fprintf (dump_file, "SMS loop-with-not-single-set\n");
+ else if ((INSN_P (insn) && !JUMP_P (insn)
+ && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
+ fprintf (dump_file, "SMS loop-with-not-single-set\n");
+ else
+ fprintf (dump_file, "SMS loop with subreg in lhs\n");
print_rtl_single (dump_file, insn);
}
@@ -998,7 +1066,7 @@ sms_schedule (void)
/* In case of th loop have doloop register it gets special
handling. */
count_init = NULL_RTX;
- if ((count_reg = doloop_register_get (tail)))
+ if ((count_reg = doloop_register_get (head, tail)))
{
basic_block pre_header;
@@ -1072,7 +1140,10 @@ sms_schedule (void)
the closing_branch was scheduled and should appear in the last (ii-1)
row. Otherwise, we are free to schedule the branch, and we let nodes
that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
- row; this should reduce stage_count to minimum. */
+ row; this should reduce stage_count to minimum.
+ TODO: Revisit the issue of scheduling the insns of the
+ control part relative to the branch when the control part
+ has more than one insn. */
normalize_sched_times (ps);
rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
set_columns_for_ps (ps);
@@ -1111,11 +1182,8 @@ sms_schedule (void)
if (dump_file)
print_node_sched_params (dump_file, g->num_nodes, g);
/* Generate prolog and epilog. */
- if (count_reg && !count_init)
- generate_prolog_epilog (ps, loop, count_reg);
- else
- generate_prolog_epilog (ps, loop, NULL_RTX);
-
+ generate_prolog_epilog (ps, loop, count_reg, count_init);
+
free_undo_replace_buff (reg_move_replaces);
}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7839eaa..690128f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2007-08-28 Mircea Namolaru <namolaru@il.ibm.com>
+ Vladimir Yanovsky <yanov@il.ibm.com>
+ Revital Eres <eres@il.ibm.com>
+ Andrey Belevantsev <abel@ispras.ru>
+
+ * gcc.dg/sms-1.c: New test.
+
2007-08-27 Alexandre Oliva <aoliva@redhat.com>
* g++.dg/ext/gnu-inline-common.h: New.
diff --git a/gcc/testsuite/gcc.dg/sms-1.c b/gcc/testsuite/gcc.dg/sms-1.c
new file mode 100644
index 0000000..d915ef5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sms-1.c
@@ -0,0 +1,38 @@
+/* The same test as loop-3c.c. It failed on ia64
+ due to not handling of subreg in the lhs that is fixed. */
+/* { dg-do run } */
+/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -w" } */
+
+
+#include <limits.h>
+
+void * a[255];
+
+f (m)
+{
+ int i;
+ int sh = 0x100;
+ i = m;
+ do
+ {
+ a[sh >>= 1] = ((unsigned)i << 3) + (char*)a;
+ i += 4;
+ }
+ while (i < INT_MAX/2 + 1 + 4 * 4);
+}
+
+main ()
+{
+ a[0x10] = 0;
+ a[0x08] = 0;
+ f (INT_MAX/2 + INT_MAX/4 + 2);
+ if (a[0x10] || a[0x08])
+ abort ();
+ a[0x10] = 0;
+ a[0x08] = 0;
+ f (INT_MAX/2 + 1);
+ if (! a[0x10] || a[0x08])
+ abort ();
+ exit (0);
+}
+