diff options
author | Jan Hubicka <jh@suse.cz> | 2009-05-11 22:54:56 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2009-05-11 20:54:56 +0000 |
commit | 08f1af2ed022e03c212779751c83ec8e2573a450 (patch) | |
tree | 80a513da7ccd914d65c08f77342649107a50930e | |
parent | a7d39bd39c8f3ad07e6688afe4b55d7d75ea5080 (diff) | |
download | gcc-08f1af2ed022e03c212779751c83ec8e2573a450.zip gcc-08f1af2ed022e03c212779751c83ec8e2573a450.tar.gz gcc-08f1af2ed022e03c212779751c83ec8e2573a450.tar.bz2 |
pr21829.c: Simplify matching since we now optimize better.
* gcc.dg/tree-ssa/pr21829.c: Simplify matching since
we now optimize better.
* gcc.dg/Wunreachable-8.c: Bogus warnings now come
out at different places.
* gcc.dg/vect/vect-92.c: Increase loop iteration count to prevent
unroling.
* gcc.dg/vect/vect-76.c: Likewise.
* gcc.dg/vect/vect-70.c: Likewise.
* gcc.dg/vect/vect-66.c: Likewise.
* gcc.dg/vect/no-section-anchors-vect-66.c: Likewise.
* gcc.dg/vect/slp-3.c: One of loops gets now fully unrolled.
* tree-ssa-loop-ivcanon.c: Include target.h
(struct loop_size): new structure.
(constant_after_peeling): New predicate.
(tree_estimate_loop_size): New function.
(estimated_unrolled_size): Rewrite for new estimates.
(try_unroll_loop_completely): Use new estimates.
* Makefile.in (tree-ssa-loop-ivcanon.o): Add dependenc on target.h
From-SVN: r147395
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/Makefile.in | 2 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/Wunreachable-2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/Wunreachable-8.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pr21829.c | 32 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/slp-3.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-66.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-70.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-76.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-92.c | 8 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivcanon.c | 204 |
13 files changed, 229 insertions, 60 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b696848..809ff47 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2009-05-11 Jan Hubicka <jh@suse.cz> + + * tree-ssa-loop-ivcanon.c: Include target.h + (struct loop_size): new structure. + (constant_after_peeling): New predicate. + (tree_estimate_loop_size): New function. + (estimated_unrolled_size): Rewrite for new estimates. + (try_unroll_loop_completely): Use new estimates. + * Makefile.in (tree-ssa-loop-ivcanon.o): Add dependenc on target.h + 2009-05-11 Andrew Pinski <andrew_pinski@playstation.sony.com> * config/spu/spu-c.c (spu_categorize_keyword): Update for recent diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9a4d62b..ab69028 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2268,7 +2268,7 @@ tree-ssa-loop-ivcanon.o : tree-ssa-loop-ivcanon.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(PARAMS_H) \ $(TREE_INLINE_H) output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ $(FLAGS_H) $(TREE_PASS_H) $(SCEV_H) $(BASIC_BLOCK_H) $(GGC_H) \ - hard-reg-set.h tree-chrec.h + hard-reg-set.h tree-chrec.h $(TARGET_H) tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a6e67a5..f0f45fc 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,17 @@ +2009-05-11 Jan Hubicka <jh@suse.cz> + + * gcc.dg/tree-ssa/pr21829.c: Simplify matching since + we now optimize better. + * gcc.dg/Wunreachable-8.c: Bogus warnings now come + out at different places. + * gcc.dg/vect/vect-92.c: Increase loop iteration count to prevent + unroling. + * gcc.dg/vect/vect-76.c: Likewise. + * gcc.dg/vect/vect-70.c: Likewise. + * gcc.dg/vect/vect-66.c: Likewise. + * gcc.dg/vect/no-section-anchors-vect-66.c: Likewise. + * gcc.dg/vect/slp-3.c: One of loops gets now fully unrolled. + 2009-05-11 H.J. Lu <hongjiu.lu@intel.com> PR middle-end/40080 diff --git a/gcc/testsuite/gcc.dg/Wunreachable-2.c b/gcc/testsuite/gcc.dg/Wunreachable-2.c index 8242441..55a8f9c 100644 --- a/gcc/testsuite/gcc.dg/Wunreachable-2.c +++ b/gcc/testsuite/gcc.dg/Wunreachable-2.c @@ -9,8 +9,8 @@ void bar (void) { int i; - for (i = 0; i < 2; i++) - if (! foo (a[i])) + for (i = 0; i < 2; i++) /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */ + if (! foo (a[i])) /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */ return; baz (); /* { dg-bogus "will never be executed" } */ diff --git a/gcc/testsuite/gcc.dg/Wunreachable-8.c b/gcc/testsuite/gcc.dg/Wunreachable-8.c index 81254ba..1a13d64 100644 --- a/gcc/testsuite/gcc.dg/Wunreachable-8.c +++ b/gcc/testsuite/gcc.dg/Wunreachable-8.c @@ -4,7 +4,7 @@ float Factorial(float X) { float val = 1.0; int k,j; - for (k=1; k < 5; k++) + for (k=1; k < 5; k++) /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */ { val += 1.0; /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */ } diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c b/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c index 6b5c4bb..c95714a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr21829.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-cddce2" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ int test(int v) { @@ -16,33 +16,7 @@ int test(int v) return x; } -/* This should be optimized to +/* This should be unrolled and optimized into conditional set of return value "v < 0". */ - if (v <= 0) goto <L1>; else goto <L3>; - - <L1>:; - - # x_1 = PHI <0(3), 1(1)>; - <L3>:; - return x_1; - - retaining only a single conditional. This doesn't work as nobody - combines the two tests - - if (v < 0) goto <bb 4>; else goto <bb 3>; - - <bb 3>: - - if (v <= 0) goto <bb 4>; else goto <bb 5>; - - this late in the game. tree-ssa-ifcombine.c would do it if we would - unroll the loop during early loop unrolling though. - - For now vrp2 does all the needed folding and threading and cddce2 - provides a nice IL to scan. */ - -/* { dg-final { scan-tree-dump-times "if " 1 "optimized" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "if " 2 "cddce2" } } */ -/* { dg-final { scan-tree-dump "x_. = PHI <0\\\(.\\\), 1\\\(.\\\)>" "cddce2" } } */ -/* { dg-final { cleanup-tree-dump "cddce2" } } */ +/* { dg-final { scan-tree-dump-not "if \\(" "optimized" } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c index d590975..49a9098f 100644 --- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c +++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c @@ -3,7 +3,7 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 8 +#define N 16 int ia[8][5][N+2]; int ic[16][16][5][N+2]; diff --git a/gcc/testsuite/gcc.dg/vect/slp-3.c b/gcc/testsuite/gcc.dg/vect/slp-3.c index 0707153..1bb9884 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-3.c +++ b/gcc/testsuite/gcc.dg/vect/slp-3.c @@ -142,7 +142,8 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail vect_no_align } } } */ +/* One of the loops gets complettely unrolled. */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_no_align } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-66.c b/gcc/testsuite/gcc.dg/vect/vect-66.c index a332fa0..e0b23cd 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-66.c +++ b/gcc/testsuite/gcc.dg/vect/vect-66.c @@ -3,7 +3,7 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 8 +#define N 16 __attribute__ ((noinline)) void main1 () diff --git a/gcc/testsuite/gcc.dg/vect/vect-70.c b/gcc/testsuite/gcc.dg/vect/vect-70.c index df7de31..23b1902 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-70.c +++ b/gcc/testsuite/gcc.dg/vect/vect-70.c @@ -3,7 +3,7 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 12 +#define N 24 struct s{ int m; diff --git a/gcc/testsuite/gcc.dg/vect/vect-76.c b/gcc/testsuite/gcc.dg/vect/vect-76.c index 847b5e5..7097e7a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-76.c +++ b/gcc/testsuite/gcc.dg/vect/vect-76.c @@ -3,7 +3,7 @@ #include <stdarg.h> #include "tree-vect.h" -#define N 12 +#define N 24 #define OFF 4 /* Check handling of accesses for which the "initial condition" - diff --git a/gcc/testsuite/gcc.dg/vect/vect-92.c b/gcc/testsuite/gcc.dg/vect/vect-92.c index 01c751f..3a64e25 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-92.c +++ b/gcc/testsuite/gcc.dg/vect/vect-92.c @@ -22,13 +22,13 @@ main1 () { int i; - for (i = 0; i < 5; i++) + for (i = 0; i < 10; i++) { pa[i+1] = pb[i+1] * pc[i+1]; } /* check results: */ - for (i = 0; i < 5; i++) + for (i = 0; i < 10; i++) { if (pa[i+1] != (pb[i+1] * pc[i+1])) abort (); @@ -42,13 +42,13 @@ main2 () { int i; - for (i = 0; i < 6; i++) + for (i = 0; i < 12; i++) { pa[i+1] = pb[i+1] * pc[i+1]; } /* check results: */ - for (i = 0; i < 6; i++) + for (i = 0; i < 12; i++) { if (pa[i+1] != (pb[i+1] * pc[i+1])) abort (); diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 2101073..8e45bbb 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "flags.h" #include "tree-inline.h" +#include "target.h" /* Specifies types of loops that may be unrolled. */ @@ -118,7 +119,7 @@ tree_num_loop_insns (struct loop *loop, eni_weights *weights) { basic_block *body = get_loop_body (loop); gimple_stmt_iterator gsi; - unsigned size = 1, i; + unsigned size = 0, i; for (i = 0; i < loop->num_nodes; i++) for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) @@ -128,28 +129,195 @@ tree_num_loop_insns (struct loop *loop, eni_weights *weights) return size; } -/* Estimate number of insns of completely unrolled loop. We assume - that the size of the unrolled loop is decreased in the - following way (the numbers of insns are based on what - estimate_num_insns returns for appropriate statements): +/* Describe size of loop as detected by tree_estimate_loop_size. */ +struct loop_size +{ + /* Number of instructions in the loop. */ + int overall; + + /* Number of instructions that will be likely optimized out in + peeled iterations of loop (i.e. computation based on induction + variable where induction variable starts at known constant.) */ + int eliminated_by_peeling; + + /* Same statistics for last iteration of loop: it is smaller because + instructions after exit are not executed. */ + int last_iteration; + int last_iteration_eliminated_by_peeling; +}; + +/* Return true if OP in STMT will be constant after peeling LOOP. */ + +static bool +constant_after_peeling (tree op, gimple stmt, struct loop *loop) +{ + affine_iv iv; + + if (is_gimple_min_invariant (op)) + return true; + + /* We can still fold accesses to constant arrays when index is known. */ + if (TREE_CODE (op) != SSA_NAME) + { + tree base = op; + + /* First make fast look if we see constant array inside. */ + while (handled_component_p (base)) + base = TREE_OPERAND (base, 0); + if ((DECL_P (base) + && TREE_STATIC (base) + && TREE_READONLY (base) + && (DECL_INITIAL (base) + || (!DECL_EXTERNAL (base) + && targetm.binds_local_p (base)))) + || CONSTANT_CLASS_P (base)) + { + /* If so, see if we understand all the indices. */ + base = op; + while (handled_component_p (base)) + { + if (TREE_CODE (base) == ARRAY_REF + && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop)) + return false; + base = TREE_OPERAND (base, 0); + } + return true; + } + return false; + } + + /* Induction variables are constants. */ + if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)) + return false; + if (!is_gimple_min_invariant (iv.base)) + return false; + if (!is_gimple_min_invariant (iv.step)) + return false; + return true; +} + +/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS. + Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT. */ + +static void +tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size) +{ + basic_block *body = get_loop_body (loop); + gimple_stmt_iterator gsi; + unsigned int i; + bool after_exit; + + size->overall = 0; + size->eliminated_by_peeling = 0; + size->last_iteration = 0; + size->last_iteration_eliminated_by_peeling = 0; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num); + for (i = 0; i < loop->num_nodes; i++) + { + if (exit && body[i] != exit->src + && dominated_by_p (CDI_DOMINATORS, body[i], exit->src)) + after_exit = true; + else + after_exit = false; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit); + + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + int num = estimate_num_insns (stmt, &eni_size_weights); + bool likely_eliminated = false; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " size: %3i ", num); + print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0); + } + + /* Look for reasons why we might optimize this stmt away. */ + + /* Exit conditional. */ + if (body[i] == exit->src && stmt == last_stmt (exit->src)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Exit condition will be eliminated.\n"); + likely_eliminated = true; + } + /* Sets of IV variables */ + else if (gimple_code (stmt) == GIMPLE_ASSIGN + && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Induction variable computation will" + " be folded away.\n"); + likely_eliminated = true; + } + /* Assignments of IV variables. */ + else if (gimple_code (stmt) == GIMPLE_ASSIGN + && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME + && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop) + && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS + || constant_after_peeling (gimple_assign_rhs2 (stmt), + stmt, loop))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Constant expression will be folded away.\n"); + likely_eliminated = true; + } + /* Conditionals. */ + else if (gimple_code (stmt) == GIMPLE_COND + && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop) + && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Constant conditional.\n"); + likely_eliminated = true; + } + + size->overall += num; + if (likely_eliminated) + size->eliminated_by_peeling += num; + if (!after_exit) + { + size->last_iteration += num; + if (likely_eliminated) + size->last_iteration_eliminated_by_peeling += num; + } + } + } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall, + size->eliminated_by_peeling, size->last_iteration, + size->last_iteration_eliminated_by_peeling); + + free (body); +} - 1) exit condition gets removed (2 insns) - 2) increment of the control variable gets removed (2 insns) - 3) All remaining statements are likely to get simplified - due to constant propagation. Hard to estimate; just - as a heuristics we decrease the rest by 1/3. +/* Estimate number of insns of completely unrolled loop. + It is (NUNROLL + 1) * size of loop body with taking into account + the fact that in last copy everything after exit conditional + is dead and that some instructions will be eliminated after + peeling. - NINSNS is the number of insns in the loop before unrolling. - NUNROLL is the number of times the loop is unrolled. */ + Loop body is likely going to simplify futher, this is difficult + to guess, we just decrease the result by 1/3. */ static unsigned HOST_WIDE_INT -estimated_unrolled_size (unsigned HOST_WIDE_INT ninsns, +estimated_unrolled_size (struct loop_size *size, unsigned HOST_WIDE_INT nunroll) { - HOST_WIDE_INT unr_insns = 2 * ((HOST_WIDE_INT) ninsns - 4) / 3; + HOST_WIDE_INT unr_insns = ((nunroll) + * (HOST_WIDE_INT) (size->overall + - size->eliminated_by_peeling)); + if (!nunroll) + unr_insns = 0; + unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling; + + unr_insns = unr_insns * 2 / 3; if (unr_insns <= 0) unr_insns = 1; - unr_insns *= (nunroll + 1); return unr_insns; } @@ -165,6 +333,7 @@ try_unroll_loop_completely (struct loop *loop, { unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; gimple cond; + struct loop_size size; if (loop->inner) return false; @@ -182,9 +351,10 @@ try_unroll_loop_completely (struct loop *loop, if (ul == UL_SINGLE_ITER) return false; - ninsns = tree_num_loop_insns (loop, &eni_size_weights); + tree_estimate_loop_size (loop, exit, &size); + ninsns = size.overall; - unr_insns = estimated_unrolled_size (ninsns, n_unroll); + unr_insns = estimated_unrolled_size (&size, n_unroll); if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, " Loop size: %d\n", (int) ninsns); |