diff options
author | Zdenek Dvorak <dvorakz@suse.cz> | 2007-02-06 22:49:49 +0100 |
---|---|---|
committer | Zdenek Dvorak <rakdver@gcc.gnu.org> | 2007-02-06 21:49:49 +0000 |
commit | 89f8f30f3565328a2805adafd22e05219b56d562 (patch) | |
tree | 0b2228e5e39d0218784f7e5f8304e2e94d0d96c2 | |
parent | ca20820ef1e80ac23138cbc3beb5ba895b59a1e3 (diff) | |
download | gcc-89f8f30f3565328a2805adafd22e05219b56d562.zip gcc-89f8f30f3565328a2805adafd22e05219b56d562.tar.gz gcc-89f8f30f3565328a2805adafd22e05219b56d562.tar.bz2 |
loop.texi: Document possibility not to perform disambiguation of loops with multiple latches.
* doc/loop.texi: Document possibility not to perform disambiguation
of loops with multiple latches.
* cfgloopmanip.c (alp_enum_p): Removed.
(add_loop): Handle subloops. Use get_loop_body_with_size.
(create_preheader): Do not allow ENTRY_BLOCK_PTR to be preheader.
* cfghooks.c (redirect_edge_and_branch_force): Set dominator for
the new forwarder block.
(make_forwarder_block): Only call new_bb_cbk if it is not NULL.
Handle the case latch is NULL.
* tree-ssa-dom.c (tree_ssa_dominator_optimize): Avoid cfg modifications
when marking loop exits.
* ifcvt.c (if_convert): Ditto. Mark loop exits even if cfg cannot
be modified.
* loop-init.c (loop_optimizer_init): Do not modify cfg. Call
disambiguate_loops_with_multiple_latches.
* tree-cfgcleanup.c (cleanup_tree_cfg_loop): Calculate dominators
before fix_loop_structure.
* cfgloop.c: Include pointer-set.h and output.h.
(canonicalize_loop_headers, HEADER_BLOCK, LATCH_EDGE,
update_latch_info, mfb_keep_just, mfb_keep_nonlatch): Removed.
(get_loop_latch_edges, find_subloop_latch_edge_by_profile,
find_subloop_latch_edge_by_ivs, find_subloop_latch_edge,
mfb_redirect_edges_in_set, form_subloop, merge_latch_edges,
disambiguate_multiple_latches, get_loop_body_with_size,
disambiguate_loops_with_multiple_latches): New functions.
(flow_loop_dump): Dump multiple latch edges.
(flow_loop_nodes_find): Handle loops with multiple latches.
(flow_loops_find): Ditto. Do not call canonicalize_loop_headers.
(glb_enum_p): Modified.
(get_loop_body): Use get_loop_body_with_size.
* cfgloop.h (LOOPS_HAVE_RECORDED_EXITS): New flag.
(AVOID_CFG_MODIFICATIONS): New constant.
(disambiguate_loops_with_multiple_latches, add_loop,
get_loop_body_with_size): Declare.
* Makefile.in (cfgloop.o): Add pointer-set.h and output.h.
* gcc.dg/tree-ssa/loop-25.c: New test.
From-SVN: r121670
-rw-r--r-- | gcc/ChangeLog | 38 | ||||
-rw-r--r-- | gcc/Makefile.in | 2 | ||||
-rw-r--r-- | gcc/cfghooks.c | 12 | ||||
-rw-r--r-- | gcc/cfgloop.c | 557 | ||||
-rw-r--r-- | gcc/cfgloop.h | 8 | ||||
-rw-r--r-- | gcc/cfgloopmanip.c | 64 | ||||
-rw-r--r-- | gcc/doc/loop.texi | 23 | ||||
-rw-r--r-- | gcc/ifcvt.c | 15 | ||||
-rw-r--r-- | gcc/loop-init.c | 24 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/loop-25.c | 128 | ||||
-rw-r--r-- | gcc/tree-cfgcleanup.c | 2 | ||||
-rw-r--r-- | gcc/tree-ssa-dom.c | 2 |
13 files changed, 615 insertions, 264 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ed39761..fac53a7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,41 @@ +2007-02-06 Zdenek Dvorak <dvorakz@suse.cz> + + * doc/loop.texi: Document possibility not to perform disambiguation + of loops with multiple latches. + * cfgloopmanip.c (alp_enum_p): Removed. + (add_loop): Handle subloops. Use get_loop_body_with_size. + (create_preheader): Do not allow ENTRY_BLOCK_PTR to be preheader. + * cfghooks.c (redirect_edge_and_branch_force): Set dominator for + the new forwarder block. + (make_forwarder_block): Only call new_bb_cbk if it is not NULL. + Handle the case latch is NULL. + * tree-ssa-dom.c (tree_ssa_dominator_optimize): Avoid cfg modifications + when marking loop exits. + * ifcvt.c (if_convert): Ditto. Mark loop exits even if cfg cannot + be modified. + * loop-init.c (loop_optimizer_init): Do not modify cfg. Call + disambiguate_loops_with_multiple_latches. + * tree-cfgcleanup.c (cleanup_tree_cfg_loop): Calculate dominators + before fix_loop_structure. + * cfgloop.c: Include pointer-set.h and output.h. + (canonicalize_loop_headers, HEADER_BLOCK, LATCH_EDGE, + update_latch_info, mfb_keep_just, mfb_keep_nonlatch): Removed. + (get_loop_latch_edges, find_subloop_latch_edge_by_profile, + find_subloop_latch_edge_by_ivs, find_subloop_latch_edge, + mfb_redirect_edges_in_set, form_subloop, merge_latch_edges, + disambiguate_multiple_latches, get_loop_body_with_size, + disambiguate_loops_with_multiple_latches): New functions. + (flow_loop_dump): Dump multiple latch edges. + (flow_loop_nodes_find): Handle loops with multiple latches. + (flow_loops_find): Ditto. Do not call canonicalize_loop_headers. + (glb_enum_p): Modified. + (get_loop_body): Use get_loop_body_with_size. + * cfgloop.h (LOOPS_HAVE_RECORDED_EXITS): New flag. + (AVOID_CFG_MODIFICATIONS): New constant. + (disambiguate_loops_with_multiple_latches, add_loop, + get_loop_body_with_size): Declare. + * Makefile.in (cfgloop.o): Add pointer-set.h and output.h. + 2007-02-06 Seongbae Park <seongbae.park@gmail.com> PR inline-asm/28686 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 689122d..22111d5 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2585,7 +2585,7 @@ cfgcleanup.o : cfgcleanup.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(REGS_H) $(EMIT_RTL_H) $(CFGLAYOUT_H) tree-pass.h $(CFGLOOP_H) $(EXPR_H) cfgloop.o : cfgloop.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) coretypes.h $(TM_H) \ $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(FLAGS_H) $(FUNCTION_H) \ - $(OBSTACK_H) toplev.h $(TREE_FLOW_H) $(TREE_H) + $(OBSTACK_H) toplev.h $(TREE_FLOW_H) $(TREE_H) pointer-set.h output.h cfgloopanal.o : cfgloopanal.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) \ $(BASIC_BLOCK_H) hard-reg-set.h $(CFGLOOP_H) $(EXPR_H) coretypes.h $(TM_H) \ $(OBSTACK_H) output.h diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c index d65cce9..1fa3aa7 100644 --- a/gcc/cfghooks.c +++ b/gcc/cfghooks.c @@ -378,6 +378,10 @@ redirect_edge_and_branch_force (edge e, basic_block dest) rescan_loop_exit (e, false, true); ret = cfg_hooks->redirect_edge_and_branch_force (e, dest); + if (ret != NULL + && dom_info_available_p (CDI_DOMINATORS)) + set_immediate_dominator (CDI_DOMINATORS, ret, src); + if (current_loops != NULL) { if (ret != NULL) @@ -724,7 +728,8 @@ make_forwarder_block (basic_block bb, bool (*redirect_edge_p) (edge), fallthru->count = 0; jump = redirect_edge_and_branch_force (e, bb); - if (jump) + if (jump != NULL + && new_bb_cbk != NULL) new_bb_cbk (jump); } @@ -742,9 +747,12 @@ make_forwarder_block (basic_block bb, bool (*redirect_edge_p) (edge), /* If we do not split a loop header, then both blocks belong to the same loop. In case we split loop header and do not redirect the latch edge to DUMMY, then DUMMY belongs to the outer loop, and - BB becomes the new header. */ + BB becomes the new header. If latch is not recorded for the loop, + we leave this updating on the caller (this may only happen during + loop analysis). */ loop = dummy->loop_father; if (loop->header == dummy + && loop->latch != NULL && find_edge (loop->latch, dummy) == NULL) { remove_bb_from_loops (dummy); diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index bd9e6d3..4465b11 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -32,18 +32,11 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "flags.h" #include "tree.h" #include "tree-flow.h" - -/* Ratio of frequencies of edges so that one of more latch edges is - considered to belong to inner loop with same header. */ -#define HEAVY_EDGE_RATIO 8 - -#define HEADER_BLOCK(B) (* (int *) (B)->aux) -#define LATCH_EDGE(E) (*(int *) (E)->aux) +#include "pointer-set.h" +#include "output.h" static void flow_loops_cfg_dump (FILE *); static void establish_preds (struct loop *); -static void canonicalize_loop_headers (void); -static bool glb_enum_p (basic_block, void *); /* Dump loop related CFG information. */ @@ -90,6 +83,24 @@ superloop_at_depth (struct loop *loop, unsigned depth) return loop->pred[depth]; } +/* Returns the list of the latch edges of LOOP. */ + +static VEC (edge, heap) * +get_loop_latch_edges (const struct loop *loop) +{ + edge_iterator ei; + edge e; + VEC (edge, heap) *ret = NULL; + + FOR_EACH_EDGE (e, ei, loop->header->preds) + { + if (dominated_by_p (CDI_DOMINATORS, e->src, loop->header)) + VEC_safe_push (edge, heap, ret, e); + } + + return ret; +} + /* Dump the loop information specified by LOOP to the stream FILE using auxiliary dump callback function LOOP_DUMP_AUX if non null. */ @@ -100,14 +111,27 @@ flow_loop_dump (const struct loop *loop, FILE *file, { basic_block *bbs; unsigned i; + VEC (edge, heap) *latches; + edge e; if (! loop || ! loop->header) return; fprintf (file, ";;\n;; Loop %d\n", loop->num); - fprintf (file, ";; header %d, latch %d\n", - loop->header->index, loop->latch->index); + fprintf (file, ";; header %d, ", loop->header->index); + if (loop->latch) + fprintf (file, "latch %d\n", loop->latch->index); + else + { + fprintf (file, "multiple latches:"); + latches = get_loop_latch_edges (loop); + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + fprintf (file, " %d", e->src->index); + VEC_free (edge, heap, latches); + fprintf (file, "\n"); + } + fprintf (file, ";; depth %d, outer %ld\n", loop->depth, (long) (loop->outer ? loop->outer->num : -1)); @@ -198,46 +222,49 @@ flow_loops_free (struct loops *loops) int flow_loop_nodes_find (basic_block header, struct loop *loop) { - basic_block *stack; - int sp; + VEC (basic_block, heap) *stack = NULL; int num_nodes = 1; + edge latch; + edge_iterator latch_ei; header->loop_father = loop; header->loop_depth = loop->depth; - if (loop->latch->loop_father != loop) + FOR_EACH_EDGE (latch, latch_ei, loop->header->preds) { - stack = XNEWVEC (basic_block, n_basic_blocks); - sp = 0; + if (latch->src->loop_father == loop + || !dominated_by_p (CDI_DOMINATORS, latch->src, loop->header)) + continue; + num_nodes++; - stack[sp++] = loop->latch; - loop->latch->loop_father = loop; - loop->latch->loop_depth = loop->depth; + VEC_safe_push (basic_block, heap, stack, latch->src); + latch->src->loop_father = loop; + latch->src->loop_depth = loop->depth; - while (sp) + while (!VEC_empty (basic_block, stack)) { basic_block node; edge e; edge_iterator ei; - node = stack[--sp]; + node = VEC_pop (basic_block, stack); FOR_EACH_EDGE (e, ei, node->preds) { basic_block ancestor = e->src; - if (ancestor != ENTRY_BLOCK_PTR - && ancestor->loop_father != loop) + if (ancestor->loop_father != loop) { ancestor->loop_father = loop; ancestor->loop_depth = loop->depth; num_nodes++; - stack[sp++] = ancestor; + VEC_safe_push (basic_block, heap, stack, ancestor); } } } - free (stack); } + VEC_free (basic_block, heap, stack); + return num_nodes; } @@ -299,156 +326,6 @@ flow_loop_tree_node_remove (struct loop *loop) loop->pred = NULL; } -/* A callback to update latch and header info for basic block JUMP created - by redirecting an edge. */ - -static void -update_latch_info (basic_block jump) -{ - alloc_aux_for_block (jump, sizeof (int)); - HEADER_BLOCK (jump) = 0; - alloc_aux_for_edge (single_pred_edge (jump), sizeof (int)); - LATCH_EDGE (single_pred_edge (jump)) = 0; - set_immediate_dominator (CDI_DOMINATORS, jump, single_pred (jump)); -} - -/* A callback for make_forwarder block, to redirect all edges except for - MFB_KJ_EDGE to the entry part. E is the edge for that we should decide - whether to redirect it. */ - -static edge mfb_kj_edge; -static bool -mfb_keep_just (edge e) -{ - return e != mfb_kj_edge; -} - -/* A callback for make_forwarder block, to redirect the latch edges into an - entry part. E is the edge for that we should decide whether to redirect - it. */ - -static bool -mfb_keep_nonlatch (edge e) -{ - return LATCH_EDGE (e); -} - -/* Takes care of merging natural loops with shared headers. */ - -static void -canonicalize_loop_headers (void) -{ - basic_block header; - edge e; - - alloc_aux_for_blocks (sizeof (int)); - alloc_aux_for_edges (sizeof (int)); - - /* Split blocks so that each loop has only single latch. */ - FOR_EACH_BB (header) - { - edge_iterator ei; - int num_latches = 0; - int have_abnormal_edge = 0; - - FOR_EACH_EDGE (e, ei, header->preds) - { - basic_block latch = e->src; - - if (e->flags & EDGE_ABNORMAL) - have_abnormal_edge = 1; - - if (latch != ENTRY_BLOCK_PTR - && dominated_by_p (CDI_DOMINATORS, latch, header)) - { - num_latches++; - LATCH_EDGE (e) = 1; - } - } - if (have_abnormal_edge) - HEADER_BLOCK (header) = 0; - else - HEADER_BLOCK (header) = num_latches; - } - - if (HEADER_BLOCK (single_succ (ENTRY_BLOCK_PTR))) - { - basic_block bb; - - /* We could not redirect edges freely here. On the other hand, - we can simply split the edge from entry block. */ - bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR)); - - alloc_aux_for_edge (single_succ_edge (bb), sizeof (int)); - LATCH_EDGE (single_succ_edge (bb)) = 0; - alloc_aux_for_block (bb, sizeof (int)); - HEADER_BLOCK (bb) = 0; - } - - FOR_EACH_BB (header) - { - int max_freq, is_heavy; - edge heavy, tmp_edge; - edge_iterator ei; - - if (HEADER_BLOCK (header) <= 1) - continue; - - /* Find a heavy edge. */ - is_heavy = 1; - heavy = NULL; - max_freq = 0; - FOR_EACH_EDGE (e, ei, header->preds) - if (LATCH_EDGE (e) && - EDGE_FREQUENCY (e) > max_freq) - max_freq = EDGE_FREQUENCY (e); - FOR_EACH_EDGE (e, ei, header->preds) - if (LATCH_EDGE (e) && - EDGE_FREQUENCY (e) >= max_freq / HEAVY_EDGE_RATIO) - { - if (heavy) - { - is_heavy = 0; - break; - } - else - heavy = e; - } - - if (is_heavy) - { - /* Split out the heavy edge, and create inner loop for it. */ - mfb_kj_edge = heavy; - tmp_edge = make_forwarder_block (header, mfb_keep_just, - update_latch_info); - alloc_aux_for_block (tmp_edge->dest, sizeof (int)); - HEADER_BLOCK (tmp_edge->dest) = 1; - alloc_aux_for_edge (tmp_edge, sizeof (int)); - LATCH_EDGE (tmp_edge) = 0; - HEADER_BLOCK (header)--; - } - - if (HEADER_BLOCK (header) > 1) - { - /* Create a new latch block. */ - tmp_edge = make_forwarder_block (header, mfb_keep_nonlatch, - update_latch_info); - alloc_aux_for_block (tmp_edge->dest, sizeof (int)); - HEADER_BLOCK (tmp_edge->src) = 0; - HEADER_BLOCK (tmp_edge->dest) = 1; - alloc_aux_for_edge (tmp_edge, sizeof (int)); - LATCH_EDGE (tmp_edge) = 1; - } - } - - free_aux_for_blocks (); - free_aux_for_edges (); - -#ifdef ENABLE_CHECKING - verify_dominators (CDI_DOMINATORS); -#endif -} - /* Allocates and returns new loop structure. */ struct loop * @@ -494,9 +371,6 @@ flow_loops_find (struct loops *loops) /* Ensure that the dominators are computed. */ calculate_dominance_info (CDI_DOMINATORS); - /* Join loops with shared headers. */ - canonicalize_loop_headers (); - /* Count the number of loop headers. This should be the same as the number of natural loops. */ headers = sbitmap_alloc (last_basic_block); @@ -506,7 +380,6 @@ flow_loops_find (struct loops *loops) FOR_EACH_BB (header) { edge_iterator ei; - int more_latches = 0; header->loop_depth = 0; @@ -533,8 +406,6 @@ flow_loops_find (struct loops *loops) && dominated_by_p (CDI_DOMINATORS, latch, header)) { /* Shared headers should be eliminated by now. */ - gcc_assert (!more_latches); - more_latches = 1; SET_BIT (headers, header->index); num_loops++; } @@ -589,21 +460,26 @@ flow_loops_find (struct loops *loops) loop->num = num_loops; num_loops++; - /* Look for the latch for this header block. */ + flow_loop_tree_node_add (header->loop_father, loop); + loop->num_nodes = flow_loop_nodes_find (loop->header, loop); + + /* Look for the latch for this header block, if it has just a + single one. */ FOR_EACH_EDGE (e, ei, header->preds) { basic_block latch = e->src; - if (latch != ENTRY_BLOCK_PTR - && dominated_by_p (CDI_DOMINATORS, latch, header)) + if (flow_bb_inside_loop_p (loop, latch)) { + if (loop->latch != NULL) + { + /* More than one latch edge. */ + loop->latch = NULL; + break; + } loop->latch = latch; - break; } } - - flow_loop_tree_node_add (header->loop_father, loop); - loop->num_nodes = flow_loop_nodes_find (loop->header, loop); } free (dfs_order); @@ -617,6 +493,264 @@ flow_loops_find (struct loops *loops) return VEC_length (loop_p, loops->larray); } +/* Ratio of frequencies of edges so that one of more latch edges is + considered to belong to inner loop with same header. */ +#define HEAVY_EDGE_RATIO 8 + +/* Minimum number of samples for that we apply + find_subloop_latch_edge_by_profile heuristics. */ +#define HEAVY_EDGE_MIN_SAMPLES 10 + +/* If the profile info is available, finds an edge in LATCHES that much more + frequent than the remaining edges. Returns such an edge, or NULL if we do + not find one. + + We do not use guessed profile here, only the measured one. The guessed + profile is usually too flat and unreliable for this (and it is mostly based + on the loop structure of the program, so it does not make much sense to + derive the loop structure from it). */ + +static edge +find_subloop_latch_edge_by_profile (VEC (edge, heap) *latches) +{ + unsigned i; + edge e, me = NULL; + gcov_type mcount = 0, tcount = 0; + + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + { + if (e->count > mcount) + { + me = e; + mcount = e->count; + } + tcount += e->count; + } + + if (tcount < HEAVY_EDGE_MIN_SAMPLES + || (tcount - mcount) * HEAVY_EDGE_RATIO > tcount) + return NULL; + + if (dump_file) + fprintf (dump_file, + "Found latch edge %d -> %d using profile information.\n", + me->src->index, me->dest->index); + return me; +} + +/* Among LATCHES, guesses a latch edge of LOOP corresponding to subloop, based + on the structure of induction variables. Returns this edge, or NULL if we + do not find any. + + We are quite conservative, and look just for an obvious simple innermost + loop (which is the case where we would lose the most performance by not + disambiguating the loop). More precisely, we look for the following + situation: The source of the chosen latch edge dominates sources of all + the other latch edges. Additionally, the header does not contain a phi node + such that the argument from the chosen edge is equal to the argument from + another edge. */ + +static edge +find_subloop_latch_edge_by_ivs (struct loop *loop, VEC (edge, heap) *latches) +{ + edge e, latch = VEC_index (edge, latches, 0); + unsigned i; + tree phi, lop; + basic_block bb; + + /* Find the candidate for the latch edge. */ + for (i = 1; VEC_iterate (edge, latches, i, e); i++) + if (dominated_by_p (CDI_DOMINATORS, latch->src, e->src)) + latch = e; + + /* Verify that it dominates all the latch edges. */ + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + if (!dominated_by_p (CDI_DOMINATORS, e->src, latch->src)) + return NULL; + + /* Check for a phi node that would deny that this is a latch edge of + a subloop. */ + for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) + { + lop = PHI_ARG_DEF_FROM_EDGE (phi, latch); + + /* Ignore the values that are not changed inside the subloop. */ + if (TREE_CODE (lop) != SSA_NAME + || SSA_NAME_DEF_STMT (lop) == phi) + continue; + bb = bb_for_stmt (SSA_NAME_DEF_STMT (lop)); + if (!bb || !flow_bb_inside_loop_p (loop, bb)) + continue; + + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + if (e != latch + && PHI_ARG_DEF_FROM_EDGE (phi, e) == lop) + return NULL; + } + + if (dump_file) + fprintf (dump_file, + "Found latch edge %d -> %d using iv structure.\n", + latch->src->index, latch->dest->index); + return latch; +} + +/* If we can determine that one of the several latch edges of LOOP behaves + as a latch edge of a separate subloop, returns this edge. Otherwise + returns NULL. */ + +static edge +find_subloop_latch_edge (struct loop *loop) +{ + VEC (edge, heap) *latches = get_loop_latch_edges (loop); + edge latch = NULL; + + if (VEC_length (edge, latches) > 1) + { + latch = find_subloop_latch_edge_by_profile (latches); + + if (!latch + /* We consider ivs to guess the latch edge only in SSA. Perhaps we + should use cfghook for this, but it is hard to imagine it would + be useful elsewhere. */ + && current_ir_type () == IR_GIMPLE) + latch = find_subloop_latch_edge_by_ivs (loop, latches); + } + + VEC_free (edge, heap, latches); + return latch; +} + +/* Callback for make_forwarder_block. Returns true if the edge E is marked + in the set MFB_REIS_SET. */ + +static struct pointer_set_t *mfb_reis_set; +static bool +mfb_redirect_edges_in_set (edge e) +{ + return pointer_set_contains (mfb_reis_set, e); +} + +/* Creates a subloop of LOOP with latch edge LATCH. */ + +static void +form_subloop (struct loop *loop, edge latch) +{ + edge_iterator ei; + edge e, new_entry; + struct loop *new_loop; + + mfb_reis_set = pointer_set_create (); + FOR_EACH_EDGE (e, ei, loop->header->preds) + { + if (e != latch) + pointer_set_insert (mfb_reis_set, e); + } + new_entry = make_forwarder_block (loop->header, mfb_redirect_edges_in_set, + NULL); + pointer_set_destroy (mfb_reis_set); + + loop->header = new_entry->src; + + /* Find the blocks and subloops that belong to the new loop, and add it to + the appropriate place in the loop tree. */ + new_loop = alloc_loop (); + new_loop->header = new_entry->dest; + new_loop->latch = latch->src; + add_loop (new_loop, loop); +} + +/* Make all the latch edges of LOOP to go to a single forwarder block -- + a new latch of LOOP. */ + +static void +merge_latch_edges (struct loop *loop) +{ + VEC (edge, heap) *latches = get_loop_latch_edges (loop); + edge latch, e; + unsigned i; + + gcc_assert (VEC_length (edge, latches) > 0); + + if (VEC_length (edge, latches) == 1) + loop->latch = VEC_index (edge, latches, 0)->src; + else + { + if (dump_file) + fprintf (dump_file, "Merged latch edges of loop %d\n", loop->num); + + mfb_reis_set = pointer_set_create (); + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + pointer_set_insert (mfb_reis_set, e); + latch = make_forwarder_block (loop->header, mfb_redirect_edges_in_set, + NULL); + pointer_set_destroy (mfb_reis_set); + + loop->header = latch->dest; + loop->latch = latch->src; + } + + VEC_free (edge, heap, latches); +} + +/* LOOP may have several latch edges. Transform it into (possibly several) + loops with single latch edge. */ + +static void +disambiguate_multiple_latches (struct loop *loop) +{ + edge e; + + /* We eliminate the mutiple latches by splitting the header to the forwarder + block F and the rest R, and redirecting the edges. There are two cases: + + 1) If there is a latch edge E that corresponds to a subloop (we guess + that based on profile -- if it is taken much more often than the + remaining edges; and on trees, using the information about induction + variables of the loops), we redirect E to R, all the remaining edges to + F, then rescan the loops and try again for the outer loop. + 2) If there is no such edge, we redirect all latch edges to F, and the + entry edges to R, thus making F the single latch of the loop. */ + + if (dump_file) + fprintf (dump_file, "Disambiguating loop %d with multiple latches\n", + loop->num); + + /* During latch merging, we may need to redirect the entry edges to a new + block. This would cause problems if the entry edge was the one from the + entry block. To avoid having to handle this case specially, split + such entry edge. */ + e = find_edge (ENTRY_BLOCK_PTR, loop->header); + if (e) + split_edge (e); + + while (1) + { + e = find_subloop_latch_edge (loop); + if (!e) + break; + + form_subloop (loop, e); + } + + merge_latch_edges (loop); +} + +/* Split loops with multiple latch edges. */ + +void +disambiguate_loops_with_multiple_latches (void) +{ + loop_iterator li; + struct loop *loop; + + FOR_EACH_LOOP (li, loop, 0) + { + if (!loop->latch) + disambiguate_multiple_latches (loop); + } +} + /* Return nonzero if basic block BB belongs to LOOP. */ bool flow_bb_inside_loop_p (const struct loop *loop, const basic_block bb) @@ -630,44 +764,59 @@ flow_bb_inside_loop_p (const struct loop *loop, const basic_block bb) return loop == source_loop || flow_loop_nested_p (loop, source_loop); } -/* Enumeration predicate for get_loop_body. */ +/* Enumeration predicate for get_loop_body_with_size. */ static bool -glb_enum_p (basic_block bb, void *glb_header) +glb_enum_p (basic_block bb, void *glb_loop) +{ + struct loop *loop = glb_loop; + return (bb != loop->header + && dominated_by_p (CDI_DOMINATORS, bb, loop->header)); +} + +/* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs + order against direction of edges from latch. Specially, if + header != latch, latch is the 1-st block. LOOP cannot be the fake + loop tree root, and its size must be at most MAX_SIZE. The blocks + in the LOOP body are stored to BODY, and the size of the LOOP is + returned. */ + +unsigned +get_loop_body_with_size (const struct loop *loop, basic_block *body, + unsigned max_size) { - return bb != (basic_block) glb_header; + return dfs_enumerate_from (loop->header, 1, glb_enum_p, + body, max_size, (void *) loop); } /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs order against direction of edges from latch. Specially, if header != latch, latch is the 1-st block. */ + basic_block * get_loop_body (const struct loop *loop) { - basic_block *tovisit, bb; + basic_block *body, bb; unsigned tv = 0; gcc_assert (loop->num_nodes); - tovisit = XCNEWVEC (basic_block, loop->num_nodes); - tovisit[tv++] = loop->header; + body = XCNEWVEC (basic_block, loop->num_nodes); if (loop->latch == EXIT_BLOCK_PTR) { - /* There may be blocks unreachable from EXIT_BLOCK. */ + /* There may be blocks unreachable from EXIT_BLOCK, hence we need to + special-case the fake loop that contains the whole function. */ gcc_assert (loop->num_nodes == (unsigned) n_basic_blocks); + body[tv++] = loop->header; + body[tv++] = EXIT_BLOCK_PTR; FOR_EACH_BB (bb) - tovisit[tv++] = bb; - tovisit[tv++] = EXIT_BLOCK_PTR; - } - else if (loop->latch != loop->header) - { - tv = dfs_enumerate_from (loop->latch, 1, glb_enum_p, - tovisit + 1, loop->num_nodes - 1, - loop->header) + 1; + body[tv++] = bb; } + else + tv = get_loop_body_with_size (loop, body, loop->num_nodes); gcc_assert (tv == loop->num_nodes); - return tovisit; + return body; } /* Fills dominance descendants inside LOOP of the basic block BB into diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 4223a39..09eef08 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -168,11 +168,13 @@ enum LOOPS_HAVE_PREHEADERS = 1, LOOPS_HAVE_SIMPLE_LATCHES = 2, LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS = 4, - LOOPS_HAVE_RECORDED_EXITS = 8 + LOOPS_HAVE_RECORDED_EXITS = 8, + LOOPS_MAY_HAVE_MULTIPLE_LATCHES = 16 }; #define LOOPS_NORMAL (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES \ | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS) +#define AVOID_CFG_MODIFICATIONS (LOOPS_MAY_HAVE_MULTIPLE_LATCHES) typedef struct loop *loop_p; DEF_VEC_P (loop_p); @@ -198,6 +200,7 @@ struct loops /* Loop recognition. */ extern int flow_loops_find (struct loops *); +extern void disambiguate_loops_with_multiple_latches (void); extern void flow_loops_free (struct loops *); extern void flow_loops_dump (FILE *, void (*)(const struct loop *, FILE *, int), int); @@ -215,6 +218,7 @@ void rescan_loop_exit (edge, bool, bool); /* Loop data structure manipulation/querying. */ extern void flow_loop_tree_node_add (struct loop *, struct loop *); extern void flow_loop_tree_node_remove (struct loop *); +extern void add_loop (struct loop *, struct loop *); extern bool flow_loop_nested_p (const struct loop *, const struct loop *); extern bool flow_bb_inside_loop_p (const struct loop *, const basic_block); extern struct loop * find_common_loop (struct loop *, struct loop *); @@ -229,6 +233,8 @@ extern void mark_loop_exit_edges (void); /* Loops & cfg manipulation. */ extern basic_block *get_loop_body (const struct loop *); +extern unsigned get_loop_body_with_size (const struct loop *, basic_block *, + unsigned); extern basic_block *get_loop_body_in_dom_order (const struct loop *); extern basic_block *get_loop_body_in_bfs_order (const struct loop *); extern VEC (edge, heap) *get_loop_exit_edges (const struct loop *); diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 8e5f4c2..9ca3947 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -38,11 +38,9 @@ static void loop_redirect_edge (edge, basic_block); static void remove_bbs (basic_block *, int); static bool rpe_enum_p (basic_block, void *); static int find_path (edge, basic_block **); -static bool alp_enum_p (basic_block, void *); static void fix_loop_placements (struct loop *, bool *); static bool fix_bb_placement (basic_block); static void fix_bb_placements (basic_block, bool *); -static void place_new_loop (struct loop *); static basic_block create_preheader (struct loop *, int); static void unloop (struct loop *, bool *); @@ -395,39 +393,54 @@ remove_path (edge e) return true; } -/* Predicate for enumeration in add_loop. */ -static bool -alp_enum_p (basic_block bb, void *alp_header) +/* Creates place for a new LOOP in loops structure. */ + +static void +place_new_loop (struct loop *loop) { - return bb != (basic_block) alp_header; + loop->num = number_of_loops (); + VEC_safe_push (loop_p, heap, current_loops->larray, loop); } /* Given LOOP structure with filled header and latch, find the body of the corresponding loop and add it to loops tree. Insert the LOOP as a son of outer. */ -static void +void add_loop (struct loop *loop, struct loop *outer) { basic_block *bbs; int i, n; + struct loop *subloop; /* Add it to loop structure. */ place_new_loop (loop); flow_loop_tree_node_add (outer, loop); /* Find its nodes. */ - bbs = XCNEWVEC (basic_block, n_basic_blocks); - n = dfs_enumerate_from (loop->latch, 1, alp_enum_p, - bbs, n_basic_blocks, loop->header); + bbs = XNEWVEC (basic_block, n_basic_blocks); + n = get_loop_body_with_size (loop, bbs, n_basic_blocks); for (i = 0; i < n; i++) { - remove_bb_from_loops (bbs[i]); - add_bb_to_loop (bbs[i], loop); + if (bbs[i]->loop_father == outer) + { + remove_bb_from_loops (bbs[i]); + add_bb_to_loop (bbs[i], loop); + continue; + } + + loop->num_nodes++; + + /* If we find a direct subloop of OUTER, move it to LOOP. */ + subloop = bbs[i]->loop_father; + if (subloop->outer == outer + && subloop->header == bbs[i]) + { + flow_loop_tree_node_remove (subloop); + flow_loop_tree_node_add (loop, subloop); + } } - remove_bb_from_loops (loop->header); - add_bb_to_loop (loop->header, loop); free (bbs); } @@ -631,14 +644,6 @@ fix_loop_placements (struct loop *loop, bool *irred_invalidated) } } -/* Creates place for a new LOOP in loops structure. */ -static void -place_new_loop (struct loop *loop) -{ - loop->num = number_of_loops (); - VEC_safe_push (loop_p, heap, current_loops->larray, loop); -} - /* Copies copy of LOOP as subloop of TARGET loop, placing newly created loop into loops structure. */ struct loop * @@ -1115,12 +1120,15 @@ create_preheader (struct loop *loop, int flags) gcc_assert (nentry); if (nentry == 1) { - /* Get an edge that is different from the one from loop->latch - to loop->header. */ - e = EDGE_PRED (loop->header, - EDGE_PRED (loop->header, 0)->src == loop->latch); - - if (!(flags & CP_SIMPLE_PREHEADERS) || single_succ_p (e->src)) + e = loop_preheader_edge (loop); + + if (/* We do not allow entry block to be the loop preheader, since we + cannot emit code there. */ + e->src != ENTRY_BLOCK_PTR + /* If we want simple preheaders, also force the preheader to have + just a single successor. */ + && !((flags & CP_SIMPLE_PREHEADERS) + && !single_succ_p (e->src))) return NULL; } diff --git a/gcc/doc/loop.texi b/gcc/doc/loop.texi index e486b0c..3f0076e 100644 --- a/gcc/doc/loop.texi +++ b/gcc/doc/loop.texi @@ -48,11 +48,17 @@ a single header, or if there is a branching in the middle of the loop. The representation of loops in GCC however allows only loops with a single latch. During loop analysis, headers of such loops are split and forwarder blocks are created in order to disambiguate their structures. -A heuristic based on profile information is used to determine whether -the latches correspond to sub-loops or to control flow in a single loop. -This means that the analysis sometimes changes the CFG, and if you run -it in the middle of an optimization pass, you must be able to deal with -the new blocks. +Heuristic based on profile information and structure of the induction +variables in the loops is used to determine whether the latches +correspond to sub-loops or to control flow in a single loop. This means +that the analysis sometimes changes the CFG, and if you run it in the +middle of an optimization pass, you must be able to deal with the new +blocks. You may avoid CFG changes by passing +@code{LOOPS_MAY_HAVE_MULTIPLE_LATCHES} flag to the loop discovery, +note however that most other loop manipulation functions will not work +correctly for loops with multiple latch edges (the functions that only +query membership of blocks to loops and subloop relationships, or +enumerate and test loop exits, can be expected to work). Body of the loop is the set of blocks that are dominated by its header, and reachable from its latch against the direction of edges in CFG. The @@ -96,6 +102,13 @@ bitmask. These flags specify what other properties of the loop structures should be calculated/enforced and preserved later: @itemize +@item @code{LOOPS_MAY_HAVE_MULTIPLE_LATCHES}: If this flag is set, no +changes to CFG will be performed in the loop analysis, in particular, +loops with multiple latch edges will not be disambiguated. If a loop +has multiple latches, its latch block is set to NULL. Most of +the loop manipulation functions will not work for loops in this shape. +No other flags that require CFG changes can be passed to +loop_optimizer_init. @item @code{LOOPS_HAVE_PREHEADERS}: Forwarder blocks are created in such a way that each loop has only one entry edge, and additionally, the source block of this entry edge has only one successor. This creates a diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index bbd349d..9df57bb 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -3908,18 +3908,13 @@ if_convert (int x_life_data_ok) num_true_changes = 0; life_data_ok = (x_life_data_ok != 0); - if ((! targetm.cannot_modify_jumps_p ()) - && (!flag_reorder_blocks_and_partition || !no_new_pseudos - || !targetm.have_named_sections)) + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); + if (current_loops) { - loop_optimizer_init (0); - if (current_loops) - { - mark_loop_exit_edges (); - loop_optimizer_finalize (); - } - free_dominance_info (CDI_DOMINATORS); + mark_loop_exit_edges (); + loop_optimizer_finalize (); } + free_dominance_info (CDI_DOMINATORS); /* Compute postdominators if we think we'll use them. */ if (HAVE_conditional_execution || life_data_ok) diff --git a/gcc/loop-init.c b/gcc/loop-init.c index 2c7f37a..3e6d342 100644 --- a/gcc/loop-init.c +++ b/gcc/loop-init.c @@ -40,22 +40,11 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA void loop_optimizer_init (unsigned flags) { - edge e; - edge_iterator ei; struct loops *loops; gcc_assert (!current_loops); loops = XCNEW (struct loops); - /* Avoid annoying special cases of edges going to exit - block. */ - - for (ei = ei_start (EXIT_BLOCK_PTR->preds); (e = ei_safe_edge (ei)); ) - if ((e->flags & EDGE_FALLTHRU) && !single_succ_p (e->src)) - split_edge (e); - else - ei_next (&ei); - /* Find the loops. */ flow_loops_find (loops); @@ -69,6 +58,19 @@ loop_optimizer_init (unsigned flags) return; } + if (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES) + { + /* If the loops may have multiple latches, we cannot canonicalize + them further (and most of the loop manipulation functions will + not work). However, we avoid modifying cfg, which some + passes may want. */ + gcc_assert ((flags & ~(LOOPS_MAY_HAVE_MULTIPLE_LATCHES + | LOOPS_HAVE_RECORDED_EXITS)) == 0); + current_loops->state = LOOPS_MAY_HAVE_MULTIPLE_LATCHES; + } + else + disambiguate_loops_with_multiple_latches (); + /* Create pre-headers. */ if (flags & LOOPS_HAVE_PREHEADERS) create_preheaders (CP_SIMPLE_PREHEADERS); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 47b918e..d976bd0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2007-02-06 Zdenek Dvorak <dvorakz@suse.cz> + + * gcc.dg/tree-ssa/loop-25.c: New test. + 2007-02-06 Richard Henderson <rth@redhat.com> * gcc.target/i386/cvt-1.c: Don't expect xmm registers. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-25.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-25.c new file mode 100644 index 0000000..a15a388 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-25.c @@ -0,0 +1,128 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-vrp1" } */ + +int foo(void); +void bla(void); +void bar(void); + +void test1 (void) +{ + unsigned i; + + /* Only one loop should be found here. */ + i = 0; + while (1) + { + i++; + if (i == 100) + break; + + if (foo ()) + bla (); + else + bar (); + } +} + +void test2 (void) +{ + unsigned i, j; + + /* Two loops should be found, in this case. */ + i = j = 0; + while (1) + { + j++; + foo (); + if (j < 100) + continue; + + i++; + j = 0; + if (i == 100) + break; + } +} + +void test3 (void) +{ + unsigned i, j, k; + + /* Three loops. */ + i = j = k = 0; + while (1) + { + j++; + foo (); + if (j < 100) + continue; + + j = 0; + k++; + if (k < 100) + continue; + + k = 0; + i++; + if (i == 100) + break; + } +} + +void test4 (void) +{ + unsigned i, j, k; + + /* Two loops with a nested subloop. */ + i = j = 0; + while (1) + { + j++; + foo (); + for (k = 0; k < 100; k++) + foo (); + + if (j < 100) + continue; + + i++; + j = 0; + if (i == 100) + break; + } +} + + +void test5 (void) +{ + unsigned i, j; + + /* Both subloop and non-subloop back edges. */ + i = j = 0; + while (1) + { + j++; + foo (); + if (j < 100) + continue; + j = 0; + + i++; + if (i == 100) + break; + + if (foo ()) + bla (); + else + bar (); + } +} + +/* { dg-final { scan-tree-dump-times "Disambiguating loop" 5 "vrp1" } } */ +/* { dg-final { scan-tree-dump-times "Found latch edge" 5 "vrp1" } } */ +/* { dg-final { scan-tree-dump-times "Merged latch edges" 2 "vrp1" } } */ +/* { dg-final { scan-tree-dump-times "4 loops found" 2 "vrp1" } } */ +/* { dg-final { scan-tree-dump-times "3 loops found" 2 "vrp1" } } */ +/* { dg-final { scan-tree-dump-times "2 loops found" 1 "vrp1" } } */ + +/* { dg-final { cleanup-tree-dump "vrp1" } } */ diff --git a/gcc/tree-cfgcleanup.c b/gcc/tree-cfgcleanup.c index 547b67a..f78be8e 100644 --- a/gcc/tree-cfgcleanup.c +++ b/gcc/tree-cfgcleanup.c @@ -582,8 +582,8 @@ cleanup_tree_cfg_loop (void) if (changed) { bitmap changed_bbs = BITMAP_ALLOC (NULL); - fix_loop_structure (changed_bbs); calculate_dominance_info (CDI_DOMINATORS); + fix_loop_structure (changed_bbs); /* This usually does nothing. But sometimes parts of cfg that originally were inside a loop get out of it due to edge removal (since they diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c index d890688..8ebd139 100644 --- a/gcc/tree-ssa-dom.c +++ b/gcc/tree-ssa-dom.c @@ -279,7 +279,7 @@ tree_ssa_dominator_optimize (void) /* We need to know which edges exit loops so that we can aggressively thread through loop headers to an exit edge. */ - loop_optimizer_init (0); + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); if (current_loops) { mark_loop_exit_edges (); |