diff options
author | Zdenek Dvorak <dvorakz@suse.cz> | 2007-02-06 22:49:49 +0100 |
---|---|---|
committer | Zdenek Dvorak <rakdver@gcc.gnu.org> | 2007-02-06 21:49:49 +0000 |
commit | 89f8f30f3565328a2805adafd22e05219b56d562 (patch) | |
tree | 0b2228e5e39d0218784f7e5f8304e2e94d0d96c2 /gcc/cfgloop.c | |
parent | ca20820ef1e80ac23138cbc3beb5ba895b59a1e3 (diff) | |
download | gcc-89f8f30f3565328a2805adafd22e05219b56d562.zip gcc-89f8f30f3565328a2805adafd22e05219b56d562.tar.gz gcc-89f8f30f3565328a2805adafd22e05219b56d562.tar.bz2 |
loop.texi: Document possibility not to perform disambiguation of loops with multiple latches.
* doc/loop.texi: Document possibility not to perform disambiguation
of loops with multiple latches.
* cfgloopmanip.c (alp_enum_p): Removed.
(add_loop): Handle subloops. Use get_loop_body_with_size.
(create_preheader): Do not allow ENTRY_BLOCK_PTR to be preheader.
* cfghooks.c (redirect_edge_and_branch_force): Set dominator for
the new forwarder block.
(make_forwarder_block): Only call new_bb_cbk if it is not NULL.
Handle the case latch is NULL.
* tree-ssa-dom.c (tree_ssa_dominator_optimize): Avoid cfg modifications
when marking loop exits.
* ifcvt.c (if_convert): Ditto. Mark loop exits even if cfg cannot
be modified.
* loop-init.c (loop_optimizer_init): Do not modify cfg. Call
disambiguate_loops_with_multiple_latches.
* tree-cfgcleanup.c (cleanup_tree_cfg_loop): Calculate dominators
before fix_loop_structure.
* cfgloop.c: Include pointer-set.h and output.h.
(canonicalize_loop_headers, HEADER_BLOCK, LATCH_EDGE,
update_latch_info, mfb_keep_just, mfb_keep_nonlatch): Removed.
(get_loop_latch_edges, find_subloop_latch_edge_by_profile,
find_subloop_latch_edge_by_ivs, find_subloop_latch_edge,
mfb_redirect_edges_in_set, form_subloop, merge_latch_edges,
disambiguate_multiple_latches, get_loop_body_with_size,
disambiguate_loops_with_multiple_latches): New functions.
(flow_loop_dump): Dump multiple latch edges.
(flow_loop_nodes_find): Handle loops with multiple latches.
(flow_loops_find): Ditto. Do not call canonicalize_loop_headers.
(glb_enum_p): Modified.
(get_loop_body): Use get_loop_body_with_size.
* cfgloop.h (LOOPS_HAVE_RECORDED_EXITS): New flag.
(AVOID_CFG_MODIFICATIONS): New constant.
(disambiguate_loops_with_multiple_latches, add_loop,
get_loop_body_with_size): Declare.
* Makefile.in (cfgloop.o): Add pointer-set.h and output.h.
* gcc.dg/tree-ssa/loop-25.c: New test.
From-SVN: r121670
Diffstat (limited to 'gcc/cfgloop.c')
-rw-r--r-- | gcc/cfgloop.c | 557 |
1 files changed, 353 insertions, 204 deletions
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index bd9e6d3..4465b11 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -32,18 +32,11 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "flags.h" #include "tree.h" #include "tree-flow.h" - -/* Ratio of frequencies of edges so that one of more latch edges is - considered to belong to inner loop with same header. */ -#define HEAVY_EDGE_RATIO 8 - -#define HEADER_BLOCK(B) (* (int *) (B)->aux) -#define LATCH_EDGE(E) (*(int *) (E)->aux) +#include "pointer-set.h" +#include "output.h" static void flow_loops_cfg_dump (FILE *); static void establish_preds (struct loop *); -static void canonicalize_loop_headers (void); -static bool glb_enum_p (basic_block, void *); /* Dump loop related CFG information. */ @@ -90,6 +83,24 @@ superloop_at_depth (struct loop *loop, unsigned depth) return loop->pred[depth]; } +/* Returns the list of the latch edges of LOOP. */ + +static VEC (edge, heap) * +get_loop_latch_edges (const struct loop *loop) +{ + edge_iterator ei; + edge e; + VEC (edge, heap) *ret = NULL; + + FOR_EACH_EDGE (e, ei, loop->header->preds) + { + if (dominated_by_p (CDI_DOMINATORS, e->src, loop->header)) + VEC_safe_push (edge, heap, ret, e); + } + + return ret; +} + /* Dump the loop information specified by LOOP to the stream FILE using auxiliary dump callback function LOOP_DUMP_AUX if non null. */ @@ -100,14 +111,27 @@ flow_loop_dump (const struct loop *loop, FILE *file, { basic_block *bbs; unsigned i; + VEC (edge, heap) *latches; + edge e; if (! loop || ! loop->header) return; fprintf (file, ";;\n;; Loop %d\n", loop->num); - fprintf (file, ";; header %d, latch %d\n", - loop->header->index, loop->latch->index); + fprintf (file, ";; header %d, ", loop->header->index); + if (loop->latch) + fprintf (file, "latch %d\n", loop->latch->index); + else + { + fprintf (file, "multiple latches:"); + latches = get_loop_latch_edges (loop); + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + fprintf (file, " %d", e->src->index); + VEC_free (edge, heap, latches); + fprintf (file, "\n"); + } + fprintf (file, ";; depth %d, outer %ld\n", loop->depth, (long) (loop->outer ? loop->outer->num : -1)); @@ -198,46 +222,49 @@ flow_loops_free (struct loops *loops) int flow_loop_nodes_find (basic_block header, struct loop *loop) { - basic_block *stack; - int sp; + VEC (basic_block, heap) *stack = NULL; int num_nodes = 1; + edge latch; + edge_iterator latch_ei; header->loop_father = loop; header->loop_depth = loop->depth; - if (loop->latch->loop_father != loop) + FOR_EACH_EDGE (latch, latch_ei, loop->header->preds) { - stack = XNEWVEC (basic_block, n_basic_blocks); - sp = 0; + if (latch->src->loop_father == loop + || !dominated_by_p (CDI_DOMINATORS, latch->src, loop->header)) + continue; + num_nodes++; - stack[sp++] = loop->latch; - loop->latch->loop_father = loop; - loop->latch->loop_depth = loop->depth; + VEC_safe_push (basic_block, heap, stack, latch->src); + latch->src->loop_father = loop; + latch->src->loop_depth = loop->depth; - while (sp) + while (!VEC_empty (basic_block, stack)) { basic_block node; edge e; edge_iterator ei; - node = stack[--sp]; + node = VEC_pop (basic_block, stack); FOR_EACH_EDGE (e, ei, node->preds) { basic_block ancestor = e->src; - if (ancestor != ENTRY_BLOCK_PTR - && ancestor->loop_father != loop) + if (ancestor->loop_father != loop) { ancestor->loop_father = loop; ancestor->loop_depth = loop->depth; num_nodes++; - stack[sp++] = ancestor; + VEC_safe_push (basic_block, heap, stack, ancestor); } } } - free (stack); } + VEC_free (basic_block, heap, stack); + return num_nodes; } @@ -299,156 +326,6 @@ flow_loop_tree_node_remove (struct loop *loop) loop->pred = NULL; } -/* A callback to update latch and header info for basic block JUMP created - by redirecting an edge. */ - -static void -update_latch_info (basic_block jump) -{ - alloc_aux_for_block (jump, sizeof (int)); - HEADER_BLOCK (jump) = 0; - alloc_aux_for_edge (single_pred_edge (jump), sizeof (int)); - LATCH_EDGE (single_pred_edge (jump)) = 0; - set_immediate_dominator (CDI_DOMINATORS, jump, single_pred (jump)); -} - -/* A callback for make_forwarder block, to redirect all edges except for - MFB_KJ_EDGE to the entry part. E is the edge for that we should decide - whether to redirect it. */ - -static edge mfb_kj_edge; -static bool -mfb_keep_just (edge e) -{ - return e != mfb_kj_edge; -} - -/* A callback for make_forwarder block, to redirect the latch edges into an - entry part. E is the edge for that we should decide whether to redirect - it. */ - -static bool -mfb_keep_nonlatch (edge e) -{ - return LATCH_EDGE (e); -} - -/* Takes care of merging natural loops with shared headers. */ - -static void -canonicalize_loop_headers (void) -{ - basic_block header; - edge e; - - alloc_aux_for_blocks (sizeof (int)); - alloc_aux_for_edges (sizeof (int)); - - /* Split blocks so that each loop has only single latch. */ - FOR_EACH_BB (header) - { - edge_iterator ei; - int num_latches = 0; - int have_abnormal_edge = 0; - - FOR_EACH_EDGE (e, ei, header->preds) - { - basic_block latch = e->src; - - if (e->flags & EDGE_ABNORMAL) - have_abnormal_edge = 1; - - if (latch != ENTRY_BLOCK_PTR - && dominated_by_p (CDI_DOMINATORS, latch, header)) - { - num_latches++; - LATCH_EDGE (e) = 1; - } - } - if (have_abnormal_edge) - HEADER_BLOCK (header) = 0; - else - HEADER_BLOCK (header) = num_latches; - } - - if (HEADER_BLOCK (single_succ (ENTRY_BLOCK_PTR))) - { - basic_block bb; - - /* We could not redirect edges freely here. On the other hand, - we can simply split the edge from entry block. */ - bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR)); - - alloc_aux_for_edge (single_succ_edge (bb), sizeof (int)); - LATCH_EDGE (single_succ_edge (bb)) = 0; - alloc_aux_for_block (bb, sizeof (int)); - HEADER_BLOCK (bb) = 0; - } - - FOR_EACH_BB (header) - { - int max_freq, is_heavy; - edge heavy, tmp_edge; - edge_iterator ei; - - if (HEADER_BLOCK (header) <= 1) - continue; - - /* Find a heavy edge. */ - is_heavy = 1; - heavy = NULL; - max_freq = 0; - FOR_EACH_EDGE (e, ei, header->preds) - if (LATCH_EDGE (e) && - EDGE_FREQUENCY (e) > max_freq) - max_freq = EDGE_FREQUENCY (e); - FOR_EACH_EDGE (e, ei, header->preds) - if (LATCH_EDGE (e) && - EDGE_FREQUENCY (e) >= max_freq / HEAVY_EDGE_RATIO) - { - if (heavy) - { - is_heavy = 0; - break; - } - else - heavy = e; - } - - if (is_heavy) - { - /* Split out the heavy edge, and create inner loop for it. */ - mfb_kj_edge = heavy; - tmp_edge = make_forwarder_block (header, mfb_keep_just, - update_latch_info); - alloc_aux_for_block (tmp_edge->dest, sizeof (int)); - HEADER_BLOCK (tmp_edge->dest) = 1; - alloc_aux_for_edge (tmp_edge, sizeof (int)); - LATCH_EDGE (tmp_edge) = 0; - HEADER_BLOCK (header)--; - } - - if (HEADER_BLOCK (header) > 1) - { - /* Create a new latch block. */ - tmp_edge = make_forwarder_block (header, mfb_keep_nonlatch, - update_latch_info); - alloc_aux_for_block (tmp_edge->dest, sizeof (int)); - HEADER_BLOCK (tmp_edge->src) = 0; - HEADER_BLOCK (tmp_edge->dest) = 1; - alloc_aux_for_edge (tmp_edge, sizeof (int)); - LATCH_EDGE (tmp_edge) = 1; - } - } - - free_aux_for_blocks (); - free_aux_for_edges (); - -#ifdef ENABLE_CHECKING - verify_dominators (CDI_DOMINATORS); -#endif -} - /* Allocates and returns new loop structure. */ struct loop * @@ -494,9 +371,6 @@ flow_loops_find (struct loops *loops) /* Ensure that the dominators are computed. */ calculate_dominance_info (CDI_DOMINATORS); - /* Join loops with shared headers. */ - canonicalize_loop_headers (); - /* Count the number of loop headers. This should be the same as the number of natural loops. */ headers = sbitmap_alloc (last_basic_block); @@ -506,7 +380,6 @@ flow_loops_find (struct loops *loops) FOR_EACH_BB (header) { edge_iterator ei; - int more_latches = 0; header->loop_depth = 0; @@ -533,8 +406,6 @@ flow_loops_find (struct loops *loops) && dominated_by_p (CDI_DOMINATORS, latch, header)) { /* Shared headers should be eliminated by now. */ - gcc_assert (!more_latches); - more_latches = 1; SET_BIT (headers, header->index); num_loops++; } @@ -589,21 +460,26 @@ flow_loops_find (struct loops *loops) loop->num = num_loops; num_loops++; - /* Look for the latch for this header block. */ + flow_loop_tree_node_add (header->loop_father, loop); + loop->num_nodes = flow_loop_nodes_find (loop->header, loop); + + /* Look for the latch for this header block, if it has just a + single one. */ FOR_EACH_EDGE (e, ei, header->preds) { basic_block latch = e->src; - if (latch != ENTRY_BLOCK_PTR - && dominated_by_p (CDI_DOMINATORS, latch, header)) + if (flow_bb_inside_loop_p (loop, latch)) { + if (loop->latch != NULL) + { + /* More than one latch edge. */ + loop->latch = NULL; + break; + } loop->latch = latch; - break; } } - - flow_loop_tree_node_add (header->loop_father, loop); - loop->num_nodes = flow_loop_nodes_find (loop->header, loop); } free (dfs_order); @@ -617,6 +493,264 @@ flow_loops_find (struct loops *loops) return VEC_length (loop_p, loops->larray); } +/* Ratio of frequencies of edges so that one of more latch edges is + considered to belong to inner loop with same header. */ +#define HEAVY_EDGE_RATIO 8 + +/* Minimum number of samples for that we apply + find_subloop_latch_edge_by_profile heuristics. */ +#define HEAVY_EDGE_MIN_SAMPLES 10 + +/* If the profile info is available, finds an edge in LATCHES that much more + frequent than the remaining edges. Returns such an edge, or NULL if we do + not find one. + + We do not use guessed profile here, only the measured one. The guessed + profile is usually too flat and unreliable for this (and it is mostly based + on the loop structure of the program, so it does not make much sense to + derive the loop structure from it). */ + +static edge +find_subloop_latch_edge_by_profile (VEC (edge, heap) *latches) +{ + unsigned i; + edge e, me = NULL; + gcov_type mcount = 0, tcount = 0; + + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + { + if (e->count > mcount) + { + me = e; + mcount = e->count; + } + tcount += e->count; + } + + if (tcount < HEAVY_EDGE_MIN_SAMPLES + || (tcount - mcount) * HEAVY_EDGE_RATIO > tcount) + return NULL; + + if (dump_file) + fprintf (dump_file, + "Found latch edge %d -> %d using profile information.\n", + me->src->index, me->dest->index); + return me; +} + +/* Among LATCHES, guesses a latch edge of LOOP corresponding to subloop, based + on the structure of induction variables. Returns this edge, or NULL if we + do not find any. + + We are quite conservative, and look just for an obvious simple innermost + loop (which is the case where we would lose the most performance by not + disambiguating the loop). More precisely, we look for the following + situation: The source of the chosen latch edge dominates sources of all + the other latch edges. Additionally, the header does not contain a phi node + such that the argument from the chosen edge is equal to the argument from + another edge. */ + +static edge +find_subloop_latch_edge_by_ivs (struct loop *loop, VEC (edge, heap) *latches) +{ + edge e, latch = VEC_index (edge, latches, 0); + unsigned i; + tree phi, lop; + basic_block bb; + + /* Find the candidate for the latch edge. */ + for (i = 1; VEC_iterate (edge, latches, i, e); i++) + if (dominated_by_p (CDI_DOMINATORS, latch->src, e->src)) + latch = e; + + /* Verify that it dominates all the latch edges. */ + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + if (!dominated_by_p (CDI_DOMINATORS, e->src, latch->src)) + return NULL; + + /* Check for a phi node that would deny that this is a latch edge of + a subloop. */ + for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) + { + lop = PHI_ARG_DEF_FROM_EDGE (phi, latch); + + /* Ignore the values that are not changed inside the subloop. */ + if (TREE_CODE (lop) != SSA_NAME + || SSA_NAME_DEF_STMT (lop) == phi) + continue; + bb = bb_for_stmt (SSA_NAME_DEF_STMT (lop)); + if (!bb || !flow_bb_inside_loop_p (loop, bb)) + continue; + + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + if (e != latch + && PHI_ARG_DEF_FROM_EDGE (phi, e) == lop) + return NULL; + } + + if (dump_file) + fprintf (dump_file, + "Found latch edge %d -> %d using iv structure.\n", + latch->src->index, latch->dest->index); + return latch; +} + +/* If we can determine that one of the several latch edges of LOOP behaves + as a latch edge of a separate subloop, returns this edge. Otherwise + returns NULL. */ + +static edge +find_subloop_latch_edge (struct loop *loop) +{ + VEC (edge, heap) *latches = get_loop_latch_edges (loop); + edge latch = NULL; + + if (VEC_length (edge, latches) > 1) + { + latch = find_subloop_latch_edge_by_profile (latches); + + if (!latch + /* We consider ivs to guess the latch edge only in SSA. Perhaps we + should use cfghook for this, but it is hard to imagine it would + be useful elsewhere. */ + && current_ir_type () == IR_GIMPLE) + latch = find_subloop_latch_edge_by_ivs (loop, latches); + } + + VEC_free (edge, heap, latches); + return latch; +} + +/* Callback for make_forwarder_block. Returns true if the edge E is marked + in the set MFB_REIS_SET. */ + +static struct pointer_set_t *mfb_reis_set; +static bool +mfb_redirect_edges_in_set (edge e) +{ + return pointer_set_contains (mfb_reis_set, e); +} + +/* Creates a subloop of LOOP with latch edge LATCH. */ + +static void +form_subloop (struct loop *loop, edge latch) +{ + edge_iterator ei; + edge e, new_entry; + struct loop *new_loop; + + mfb_reis_set = pointer_set_create (); + FOR_EACH_EDGE (e, ei, loop->header->preds) + { + if (e != latch) + pointer_set_insert (mfb_reis_set, e); + } + new_entry = make_forwarder_block (loop->header, mfb_redirect_edges_in_set, + NULL); + pointer_set_destroy (mfb_reis_set); + + loop->header = new_entry->src; + + /* Find the blocks and subloops that belong to the new loop, and add it to + the appropriate place in the loop tree. */ + new_loop = alloc_loop (); + new_loop->header = new_entry->dest; + new_loop->latch = latch->src; + add_loop (new_loop, loop); +} + +/* Make all the latch edges of LOOP to go to a single forwarder block -- + a new latch of LOOP. */ + +static void +merge_latch_edges (struct loop *loop) +{ + VEC (edge, heap) *latches = get_loop_latch_edges (loop); + edge latch, e; + unsigned i; + + gcc_assert (VEC_length (edge, latches) > 0); + + if (VEC_length (edge, latches) == 1) + loop->latch = VEC_index (edge, latches, 0)->src; + else + { + if (dump_file) + fprintf (dump_file, "Merged latch edges of loop %d\n", loop->num); + + mfb_reis_set = pointer_set_create (); + for (i = 0; VEC_iterate (edge, latches, i, e); i++) + pointer_set_insert (mfb_reis_set, e); + latch = make_forwarder_block (loop->header, mfb_redirect_edges_in_set, + NULL); + pointer_set_destroy (mfb_reis_set); + + loop->header = latch->dest; + loop->latch = latch->src; + } + + VEC_free (edge, heap, latches); +} + +/* LOOP may have several latch edges. Transform it into (possibly several) + loops with single latch edge. */ + +static void +disambiguate_multiple_latches (struct loop *loop) +{ + edge e; + + /* We eliminate the mutiple latches by splitting the header to the forwarder + block F and the rest R, and redirecting the edges. There are two cases: + + 1) If there is a latch edge E that corresponds to a subloop (we guess + that based on profile -- if it is taken much more often than the + remaining edges; and on trees, using the information about induction + variables of the loops), we redirect E to R, all the remaining edges to + F, then rescan the loops and try again for the outer loop. + 2) If there is no such edge, we redirect all latch edges to F, and the + entry edges to R, thus making F the single latch of the loop. */ + + if (dump_file) + fprintf (dump_file, "Disambiguating loop %d with multiple latches\n", + loop->num); + + /* During latch merging, we may need to redirect the entry edges to a new + block. This would cause problems if the entry edge was the one from the + entry block. To avoid having to handle this case specially, split + such entry edge. */ + e = find_edge (ENTRY_BLOCK_PTR, loop->header); + if (e) + split_edge (e); + + while (1) + { + e = find_subloop_latch_edge (loop); + if (!e) + break; + + form_subloop (loop, e); + } + + merge_latch_edges (loop); +} + +/* Split loops with multiple latch edges. */ + +void +disambiguate_loops_with_multiple_latches (void) +{ + loop_iterator li; + struct loop *loop; + + FOR_EACH_LOOP (li, loop, 0) + { + if (!loop->latch) + disambiguate_multiple_latches (loop); + } +} + /* Return nonzero if basic block BB belongs to LOOP. */ bool flow_bb_inside_loop_p (const struct loop *loop, const basic_block bb) @@ -630,44 +764,59 @@ flow_bb_inside_loop_p (const struct loop *loop, const basic_block bb) return loop == source_loop || flow_loop_nested_p (loop, source_loop); } -/* Enumeration predicate for get_loop_body. */ +/* Enumeration predicate for get_loop_body_with_size. */ static bool -glb_enum_p (basic_block bb, void *glb_header) +glb_enum_p (basic_block bb, void *glb_loop) +{ + struct loop *loop = glb_loop; + return (bb != loop->header + && dominated_by_p (CDI_DOMINATORS, bb, loop->header)); +} + +/* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs + order against direction of edges from latch. Specially, if + header != latch, latch is the 1-st block. LOOP cannot be the fake + loop tree root, and its size must be at most MAX_SIZE. The blocks + in the LOOP body are stored to BODY, and the size of the LOOP is + returned. */ + +unsigned +get_loop_body_with_size (const struct loop *loop, basic_block *body, + unsigned max_size) { - return bb != (basic_block) glb_header; + return dfs_enumerate_from (loop->header, 1, glb_enum_p, + body, max_size, (void *) loop); } /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs order against direction of edges from latch. Specially, if header != latch, latch is the 1-st block. */ + basic_block * get_loop_body (const struct loop *loop) { - basic_block *tovisit, bb; + basic_block *body, bb; unsigned tv = 0; gcc_assert (loop->num_nodes); - tovisit = XCNEWVEC (basic_block, loop->num_nodes); - tovisit[tv++] = loop->header; + body = XCNEWVEC (basic_block, loop->num_nodes); if (loop->latch == EXIT_BLOCK_PTR) { - /* There may be blocks unreachable from EXIT_BLOCK. */ + /* There may be blocks unreachable from EXIT_BLOCK, hence we need to + special-case the fake loop that contains the whole function. */ gcc_assert (loop->num_nodes == (unsigned) n_basic_blocks); + body[tv++] = loop->header; + body[tv++] = EXIT_BLOCK_PTR; FOR_EACH_BB (bb) - tovisit[tv++] = bb; - tovisit[tv++] = EXIT_BLOCK_PTR; - } - else if (loop->latch != loop->header) - { - tv = dfs_enumerate_from (loop->latch, 1, glb_enum_p, - tovisit + 1, loop->num_nodes - 1, - loop->header) + 1; + body[tv++] = bb; } + else + tv = get_loop_body_with_size (loop, body, loop->num_nodes); gcc_assert (tv == loop->num_nodes); - return tovisit; + return body; } /* Fills dominance descendants inside LOOP of the basic block BB into |