diff options
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c | 14 | ||||
-rw-r--r-- | gcc/tree-loop-distribution.c | 198 |
4 files changed, 53 insertions, 180 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c57d081..26b06d7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2013-09-12 Richard Biener <rguenther@suse.de> + + * tree-loop-distribution.c (dot_rdg_1): Make graph prettier. + (dot_rdg): Use popen instead of system in optional code. + (remaining_stmts, upstream_mem_writes): Remove global bitmaps. + (already_processed_vertex_p): Adjust. + (has_anti_or_output_dependence, predecessor_has_mem_write, + mark_nodes_having_upstream_mem_writes, has_upstream_mem_writes, + rdg_flag_uses): Remove. + (rdg_flag_vertex): Simplify. + (rdg_flag_vertex_and_dependent): Rely on a correct RDG and + remove recursion. + (build_rdg_partition_for_component): Process the first vertex + of a component only. + (ldist_gen): Do not compute remaining_stmts or upstream_mem_writes. + 2013-09-12 Alan Modra <amodra@gmail.com> * config/rs6000/rs6000.c (toc_relative_expr_p): Use add_cint_operand. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 49e28ad..da3f08d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-09-12 Richard Biener <rguenther@suse.de> + + * gcc.dg/tree-ssa/ldist-4.c: Remove undefined behavior. Adjust + expected outcome and comment why that happens. + 2013-09-11 Richard Biener <rguenther@suse.de> PR middle-end/58377 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c index a744fea..80626bd 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-4.c @@ -10,20 +10,18 @@ int loop1 (int k) a[0] = k; for (i = 1; i < 100; i ++) { - for (j = 0; j < 100; j++) + for (j = 1; j < 100; j++) { a[j] = k * i; b[i][j] = a[j-1] + k; } } - return b[100-1][0]; + return b[100-1][1]; } -/* We used to distribute also innermost loops, but these could produce - too much code in the outer loop, degrading performance of scalar - code. So this test was XFAILed because the cost model of the stand - alone distribution pass has evolved. Now it passes. */ -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" { target ilp32 } } } */ -/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" { target lp64 } } } */ +/* The current cost model fuses the two partitions because they have + similar memory accesses. */ +/* { dg-final { scan-tree-dump "similar memory accesses" "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 0 "ldist" } } */ /* { dg-final { cleanup-tree-dump "ldist" } } */ diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 5f6a14b..43c3d91 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -218,6 +218,9 @@ static void dot_rdg_1 (FILE *file, struct graph *rdg) { int i; + pretty_printer buffer; + pp_needs_newline (&buffer) = false; + buffer.buffer->stream = file; fprintf (file, "digraph RDG {\n"); @@ -226,6 +229,11 @@ dot_rdg_1 (FILE *file, struct graph *rdg) struct vertex *v = &(rdg->vertices[i]); struct graph_edge *e; + fprintf (file, "%d [label=\"[%d] ", i, i); + pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); + pp_flush (&buffer); + fprintf (file, "\"]\n"); + /* Highlight reads from memory. */ if (RDG_MEM_READS_STMT (rdg, i)) fprintf (file, "%d [style=filled, fillcolor=green]\n", i); @@ -268,16 +276,15 @@ dot_rdg_1 (FILE *file, struct graph *rdg) DEBUG_FUNCTION void dot_rdg (struct graph *rdg) { - /* When debugging, enable the following code. This cannot be used - in production compilers because it calls "system". */ -#if 0 - FILE *file = fopen ("/tmp/rdg.dot", "w"); - gcc_assert (file != NULL); - + /* When debugging, you may want to enable the following code. */ +#if 1 + FILE *file = popen("dot -Tx11", "w"); + if (!file) + return; dot_rdg_1 (file, rdg); - fclose (file); - - system ("dotty /tmp/rdg.dot &"); + fflush (file); + close (fileno (file)); + pclose (file); #else dot_rdg_1 (stderr, rdg); #endif @@ -645,17 +652,6 @@ partition_has_writes (partition_t partition) return partition->has_writes; } -/* If bit I is not set, it means that this node represents an - operation that has already been performed, and that should not be - performed again. This is the subgraph of remaining important - computations that is passed to the DFS algorithm for avoiding to - include several times the same stores in different loops. */ -static bitmap remaining_stmts; - -/* A node of the RDG is marked in this bitmap when it has as a - predecessor a node that writes to memory. */ -static bitmap upstream_mem_writes; - /* Returns true when DEF is an SSA_NAME defined in LOOP and used after the LOOP. */ @@ -1080,140 +1076,12 @@ rdg_cannot_recompute_vertex_p (struct graph *rdg, int v) static inline bool already_processed_vertex_p (bitmap processed, int v) { - return (bitmap_bit_p (processed, v) - || !bitmap_bit_p (remaining_stmts, v)); -} - -/* Returns NULL when there is no anti-dependence or output-dependence - among the successors of vertex V, otherwise returns the edge with the - dependency. */ - -static struct graph_edge * -has_anti_or_output_dependence (struct vertex *v) -{ - struct graph_edge *e; - - if (v->succ) - for (e = v->succ; e; e = e->succ_next) - if (RDGE_TYPE (e) == anti_dd - || RDGE_TYPE (e) == output_dd) - return e; - - return NULL; -} - -/* Returns true when V has an anti-dependence edge among its successors. */ - -static bool -predecessor_has_mem_write (struct graph *rdg, struct vertex *v) -{ - struct graph_edge *e; - - if (v->pred) - for (e = v->pred; e; e = e->pred_next) - if (bitmap_bit_p (upstream_mem_writes, e->src) - /* Don't consider flow channels: a write to memory followed - by a read from memory. These channels allow the split of - the RDG in different partitions. */ - && !RDG_MEM_WRITE_STMT (rdg, e->src)) - return true; - - return false; -} - -/* Initializes the upstream_mem_writes bitmap following the - information from RDG. */ - -static void -mark_nodes_having_upstream_mem_writes (struct graph *rdg) -{ - int v, x; - bitmap seen = BITMAP_ALLOC (NULL); - - for (v = rdg->n_vertices - 1; v >= 0; v--) - if (!bitmap_bit_p (seen, v)) - { - unsigned i; - vec<int> nodes; - nodes.create (3); - - graphds_dfs (rdg, &v, 1, &nodes, false, NULL); - - FOR_EACH_VEC_ELT (nodes, i, x) - { - if (!bitmap_set_bit (seen, x)) - continue; - - if (RDG_MEM_WRITE_STMT (rdg, x) - || predecessor_has_mem_write (rdg, &(rdg->vertices[x])) - /* In anti dependences the read should occur before - the write, this is why both the read and the write - should be placed in the same partition. In output - dependences the writes order need to be preserved. */ - || has_anti_or_output_dependence (&(rdg->vertices[x]))) - bitmap_set_bit (upstream_mem_writes, x); - } - - nodes.release (); - } -} - -/* Returns true when vertex u has a memory write node as a predecessor - in RDG. */ - -static bool -has_upstream_mem_writes (int u) -{ - return bitmap_bit_p (upstream_mem_writes, u); + return bitmap_bit_p (processed, v); } static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t, bitmap); -/* Flag the uses of U stopping following the information from - upstream_mem_writes. */ - -static void -rdg_flag_uses (struct graph *rdg, int u, partition_t partition, - bitmap processed) -{ - struct vertex *x = &(rdg->vertices[u]); - gimple stmt = RDGV_STMT (x); - struct graph_edge *anti_dep = has_anti_or_output_dependence (x); - - /* Keep in the same partition the destination of an antidependence, - because this is a store to the exact same location. Putting this - in another partition is bad for cache locality. */ - if (anti_dep) - { - int v = anti_dep->dest; - - if (!already_processed_vertex_p (processed, v)) - rdg_flag_vertex_and_dependent (rdg, v, partition, processed); - } - - if (is_gimple_assign (stmt) && has_upstream_mem_writes (u)) - { - tree op0 = gimple_assign_lhs (stmt); - - /* Scalar channels don't have enough space for transmitting data - between tasks, unless we add more storage by privatizing. */ - if (is_gimple_reg (op0)) - { - use_operand_p use_p; - imm_use_iterator iter; - - FOR_EACH_IMM_USE_FAST (use_p, iter, op0) - { - int v = rdg_vertex_for_stmt (rdg, USE_STMT (use_p)); - - if (!already_processed_vertex_p (processed, v)) - rdg_flag_vertex_and_dependent (rdg, v, partition, processed); - } - } - } -} - /* Flag V from RDG as part of PARTITION, and also flag its loop number in LOOPS. */ @@ -1229,10 +1097,7 @@ rdg_flag_vertex (struct graph *rdg, int v, partition_t partition) bitmap_set_bit (partition->loops, loop->num); if (rdg_cannot_recompute_vertex_p (rdg, v)) - { - partition->has_writes = true; - bitmap_clear_bit (remaining_stmts, v); - } + partition->has_writes = true; } /* Flag in the bitmap PARTITION the vertex V and all its predecessors. @@ -1247,14 +1112,11 @@ rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition, nodes.create (3); int x; - bitmap_set_bit (processed, v); - rdg_flag_uses (rdg, v, partition, processed); - graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts); - rdg_flag_vertex (rdg, v, partition); + graphds_dfs (rdg, &v, 1, &nodes, false, NULL); FOR_EACH_VEC_ELT (nodes, i, x) - if (!already_processed_vertex_p (processed, x)) - rdg_flag_vertex_and_dependent (rdg, x, partition, processed); + if (bitmap_set_bit (processed, x)) + rdg_flag_vertex (rdg, x, partition); nodes.release (); } @@ -1322,13 +1184,14 @@ rdg_flag_loop_exits (struct graph *rdg, partition_t partition, static partition_t build_rdg_partition_for_component (struct graph *rdg, rdgc c) { - int i, v; partition_t partition = partition_alloc (NULL, NULL); bitmap processed = BITMAP_ALLOC (NULL); - FOR_EACH_VEC_ELT (c->vertices, i, v) - if (!already_processed_vertex_p (processed, v)) - rdg_flag_vertex_and_dependent (rdg, v, partition, processed); + /* Flag the first vertex of the component and its dependent nodes. + Other members of the component are included in its dependencies. + ??? What do we need components for again? To early merge initial + vertices that are in a SCC of the RDG? */ + rdg_flag_vertex_and_dependent (rdg, c->vertices[0], partition, processed); rdg_flag_loop_exits (rdg, partition, processed); @@ -1777,13 +1640,8 @@ ldist_gen (struct loop *loop, struct graph *rdg, bitmap processed = BITMAP_ALLOC (NULL); bool any_builtin; - remaining_stmts = BITMAP_ALLOC (NULL); - upstream_mem_writes = BITMAP_ALLOC (NULL); - for (i = 0; i < rdg->n_vertices; i++) { - bitmap_set_bit (remaining_stmts, i); - /* Save in OTHER_STORES all the memory writes that are not in STARTING_VERTICES. */ if (RDG_MEM_WRITE_STMT (rdg, i)) @@ -1804,7 +1662,6 @@ ldist_gen (struct loop *loop, struct graph *rdg, } } - mark_nodes_having_upstream_mem_writes (rdg); rdg_build_components (rdg, starting_vertices, &components); rdg_build_partitions (rdg, components, &other_stores, &partitions, processed); @@ -1929,9 +1786,6 @@ ldist_gen (struct loop *loop, struct graph *rdg, ldist_done: - BITMAP_FREE (remaining_stmts); - BITMAP_FREE (upstream_mem_writes); - FOR_EACH_VEC_ELT (partitions, i, partition) partition_free (partition); |