aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSegher Boessenkool <segher@kernel.crashing.org>2015-09-15 02:38:21 +0200
committerSegher Boessenkool <segher@gcc.gnu.org>2015-09-15 02:38:21 +0200
commit23997c53b8f7696d6ba484913835b19287f96f82 (patch)
treed9a541717d4670d10789ddcd410d796e10b6f89c
parent311adabec523e8ad99930cd2e24716098ed77fc7 (diff)
downloadgcc-23997c53b8f7696d6ba484913835b19287f96f82.zip
gcc-23997c53b8f7696d6ba484913835b19287f96f82.tar.gz
gcc-23997c53b8f7696d6ba484913835b19287f96f82.tar.bz2
shrink-wrap: Rewrite
This patch rewrites the shrink-wrapping algorithm, allowing non-linear pieces of CFG to be duplicated for use without prologue instead of just linear pieces. * shrink-wrap.c (requires_stack_frame_p): Fix formatting. (dup_block_and_redirect): Delete function. (can_dup_for_shrink_wrapping): New function. (fix_fake_fallthrough_edge): New function. (try_shrink_wrapping): Rewrite function. (convert_to_simple_return): Call fix_fake_fallthrough_edge. From-SVN: r227775
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/shrink-wrap.c788
2 files changed, 431 insertions, 366 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f14f5b2..328675a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2015-09-14 Segher Boessenkool <segher@kernel.crashing.org>
+
+ * shrink-wrap.c (requires_stack_frame_p): Fix formatting.
+ (dup_block_and_redirect): Delete function.
+ (can_dup_for_shrink_wrapping): New function.
+ (fix_fake_fallthrough_edge): New function.
+ (try_shrink_wrapping): Rewrite function.
+ (convert_to_simple_return): Call fix_fake_fallthrough_edge.
+
2015-09-14 Rich Felker <dalias@libc.org>
* configure.ac: Change target pattern for sh TLS support
diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
index d10795a..1387594 100644
--- a/gcc/shrink-wrap.c
+++ b/gcc/shrink-wrap.c
@@ -91,8 +91,7 @@ requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
if (!REG_P (dreg))
continue;
- add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
- REGNO (dreg));
+ add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
}
if (hard_reg_set_intersect_p (hardregs, prologue_used))
return true;
@@ -463,414 +462,469 @@ prepare_shrink_wrap (basic_block entry_block)
}
}
-/* Create a copy of BB instructions and insert at BEFORE. Redirect
- preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */
-static void
-dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before,
- bitmap_head *need_prologue)
+/* Return whether we can duplicate basic block BB for shrink wrapping. We
+ cannot if the block cannot be duplicated at all, or if any of its incoming
+ edges are complex and come from a block that does not require a prologue
+ (we cannot redirect such edges), or if the block is too big to copy.
+ PRO is the basic block before which we would put the prologue, MAX_SIZE is
+ the maximum size block we allow to be copied. */
+
+static bool
+can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
{
- edge_iterator ei;
- edge e;
- rtx_insn *insn = BB_END (bb);
+ if (!can_duplicate_block_p (bb))
+ return false;
- /* We know BB has a single successor, so there is no need to copy a
- simple jump at the end of BB. */
- if (simplejump_p (insn))
- insn = PREV_INSN (insn);
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (e->flags & EDGE_COMPLEX
+ && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
+ return false;
- start_sequence ();
- duplicate_insn_chain (BB_HEAD (bb), insn);
- if (dump_file)
- {
- unsigned count = 0;
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- if (active_insn_p (insn))
- ++count;
- fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
- bb->index, copy_bb->index, count);
- }
- insn = get_insns ();
- end_sequence ();
- emit_insn_before (insn, before);
+ unsigned size = 0;
- /* Redirect all the paths that need no prologue into copy_bb. */
- for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
- if (!bitmap_bit_p (need_prologue, e->src->index))
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ if (NONDEBUG_INSN_P (insn))
{
- int freq = EDGE_FREQUENCY (e);
- copy_bb->count += e->count;
- copy_bb->frequency += EDGE_FREQUENCY (e);
- e->dest->count -= e->count;
- if (e->dest->count < 0)
- e->dest->count = 0;
- e->dest->frequency -= freq;
- if (e->dest->frequency < 0)
- e->dest->frequency = 0;
- redirect_edge_and_branch_force (e, copy_bb);
- continue;
+ size += get_attr_min_length (insn);
+ if (size > max_size)
+ return false;
}
- else
- ei_next (&ei);
+
+ return true;
}
+/* If the source of edge E has more than one successor, the verifier for
+ branch probabilities gets confused by the fake edges we make where
+ simple_return statements will be inserted later (because those are not
+ marked as fallthrough edges). Fix this by creating an extra block just
+ for that fallthrough. */
+
+static edge
+fix_fake_fallthrough_edge (edge e)
+{
+ if (EDGE_COUNT (e->src->succs) <= 1)
+ return e;
+
+ basic_block old_bb = e->src;
+ rtx_insn *end = BB_END (old_bb);
+ rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
+ basic_block new_bb = create_basic_block (note, note, old_bb);
+ BB_COPY_PARTITION (new_bb, old_bb);
+ BB_END (old_bb) = end;
+
+ redirect_edge_succ (e, new_bb);
+ e->flags |= EDGE_FALLTHRU;
+ e->flags &= ~EDGE_FAKE;
+
+ return make_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), EDGE_FAKE);
+}
/* Try to perform a kind of shrink-wrapping, making sure the
prologue/epilogue is emitted only around those parts of the
- function that require it. */
+ function that require it.
+
+ There will be exactly one prologue, and it will be executed either
+ zero or one time, on any path. Depending on where the prologue is
+ placed, some of the basic blocks can be reached via both paths with
+ and without a prologue. Such blocks will be duplicated here, and the
+ edges changed to match.
+
+ Paths that go to the exit without going through the prologue will use
+ a simple_return instead of the epilogue. We maximize the number of
+ those, making sure to only duplicate blocks that can be duplicated.
+ If the prologue can then still be placed in multiple locations, we
+ place it as early as possible.
+
+ An example, where we duplicate blocks with control flow (legend:
+ _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
+ be taken to point down or to the right, to simplify the diagram; here,
+ block 3 needs a prologue, the rest does not):
+
+
+ B B
+ | |
+ 2 2
+ |\ |\
+ | 3 becomes | 3
+ |/ | \
+ 4 7 4
+ |\ |\ |\
+ | 5 | 8 | 5
+ |/ |/ |/
+ 6 9 6
+ | | |
+ R S R
+
+
+ (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
+ edge 2->3).
+
+ Another example, where part of a loop is duplicated (again, bb 3 is
+ the only block that needs a prologue):
+
+
+ B 3<-- B ->3<--
+ | | | | | | |
+ | v | becomes | | v |
+ 2---4--- 2---5-- 4---
+ | | |
+ R S R
+
+
+ (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
+
+ ENTRY_EDGE is the edge where the prologue will be placed, possibly
+ changed by this function. ORIG_ENTRY_EDGE is the edge where it
+ would be placed without shrink-wrapping. BB_WITH is a bitmap that,
+ if we do shrink-wrap, will on return contain the interesting blocks
+ that run with prologue. PROLOGUE_SEQ is the prologue we will insert. */
void
try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge,
- bitmap_head *bb_flags, rtx_insn *prologue_seq)
+ bitmap_head *bb_with, rtx_insn *prologue_seq)
{
- edge e;
- edge_iterator ei;
- bool nonempty_prologue = false;
- unsigned max_grow_size;
- rtx_insn *seq;
+ /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
+ no sense to shrink-wrap: then do not shrink-wrap! */
+
+ if (!SHRINK_WRAPPING_ENABLED)
+ return;
+
+ if (crtl->profile && !targetm.profile_before_prologue ())
+ return;
- for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
- if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
+ if (crtl->calls_eh_return)
+ return;
+
+ bool empty_prologue = true;
+ for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
+ if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
{
- nonempty_prologue = true;
+ empty_prologue = false;
break;
}
+ if (empty_prologue)
+ return;
+
+ /* Move some code down to expose more shrink-wrapping opportunities. */
+
+ basic_block entry = (*entry_edge)->dest;
+ prepare_shrink_wrap (entry);
+
+ if (dump_file)
+ fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
+
+ /* Compute the registers set and used in the prologue. */
+
+ HARD_REG_SET prologue_clobbered, prologue_used;
+ CLEAR_HARD_REG_SET (prologue_clobbered);
+ CLEAR_HARD_REG_SET (prologue_used);
+ for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
+ if (NONDEBUG_INSN_P (insn))
+ {
+ HARD_REG_SET this_used;
+ CLEAR_HARD_REG_SET (this_used);
+ note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
+ AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
+ IOR_HARD_REG_SET (prologue_used, this_used);
+ note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered);
+ }
- if (SHRINK_WRAPPING_ENABLED
- && (targetm.profile_before_prologue () || !crtl->profile)
- && nonempty_prologue && !crtl->calls_eh_return)
+ /* Find out what registers are set up by the prologue; any use of these
+ cannot happen before the prologue. */
+
+ struct hard_reg_set_container set_up_by_prologue;
+ CLEAR_HARD_REG_SET (set_up_by_prologue.set);
+ add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
+ add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
+ if (frame_pointer_needed)
+ add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ if (pic_offset_table_rtx
+ && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
+ add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
+ PIC_OFFSET_TABLE_REGNUM);
+ if (crtl->drap_reg)
+ add_to_hard_reg_set (&set_up_by_prologue.set,
+ GET_MODE (crtl->drap_reg),
+ REGNO (crtl->drap_reg));
+ if (targetm.set_up_by_prologue)
+ targetm.set_up_by_prologue (&set_up_by_prologue);
+
+ /* We will insert the prologue before the basic block PRO. PRO should
+ dominate all basic blocks that need the prologue to be executed
+ before them. First, make PRO the "tightest wrap" possible. */
+
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ basic_block pro = 0;
+
+ basic_block bb;
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_BB_FN (bb, cfun)
{
- HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
- struct hard_reg_set_container set_up_by_prologue;
- rtx_insn *p_insn;
- vec<basic_block> vec;
- basic_block bb;
- bitmap_head bb_antic_flags;
- bitmap_head bb_on_list;
- bitmap_head bb_tail;
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ if (NONDEBUG_INSN_P (insn)
+ && requires_stack_frame_p (insn, prologue_used,
+ set_up_by_prologue.set))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
+ pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
+ break;
+ }
+ }
+ /* If nothing needs a prologue, just put it at the start. This really
+ shouldn't happen, but we cannot fix it here. */
+
+ if (pro == 0)
+ {
if (dump_file)
- fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
+ fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
+ "putting it at the start.\n");
+ pro = entry;
+ }
- /* Compute the registers set and used in the prologue. */
- CLEAR_HARD_REG_SET (prologue_clobbered);
- CLEAR_HARD_REG_SET (prologue_used);
- for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
- {
- HARD_REG_SET this_used;
- if (!NONDEBUG_INSN_P (p_insn))
- continue;
-
- CLEAR_HARD_REG_SET (this_used);
- note_uses (&PATTERN (p_insn), record_hard_reg_uses,
- &this_used);
- AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
- IOR_HARD_REG_SET (prologue_used, this_used);
- note_stores (PATTERN (p_insn), record_hard_reg_sets,
- &prologue_clobbered);
- }
+ if (dump_file)
+ fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
+ pro->index);
- prepare_shrink_wrap ((*entry_edge)->dest);
-
- bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
- bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
- bitmap_initialize (&bb_tail, &bitmap_default_obstack);
-
- /* Find the set of basic blocks that require a stack frame,
- and blocks that are too big to be duplicated. */
-
- vec.create (n_basic_blocks_for_fn (cfun));
-
- CLEAR_HARD_REG_SET (set_up_by_prologue.set);
- add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
- STACK_POINTER_REGNUM);
- add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
- if (frame_pointer_needed)
- add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
- HARD_FRAME_POINTER_REGNUM);
- if (pic_offset_table_rtx
- && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
- add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
- PIC_OFFSET_TABLE_REGNUM);
- if (crtl->drap_reg)
- add_to_hard_reg_set (&set_up_by_prologue.set,
- GET_MODE (crtl->drap_reg),
- REGNO (crtl->drap_reg));
- if (targetm.set_up_by_prologue)
- targetm.set_up_by_prologue (&set_up_by_prologue);
-
- /* We don't use a different max size depending on
- optimize_bb_for_speed_p because increasing shrink-wrapping
- opportunities by duplicating tail blocks can actually result
- in an overall decrease in code size. */
- max_grow_size = get_uncond_jump_length ();
- max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
-
- FOR_EACH_BB_FN (bb, cfun)
- {
- rtx_insn *insn;
- unsigned size = 0;
+ /* Now see if we can put the prologue at the start of PRO. Putting it
+ there might require duplicating a block that cannot be duplicated;
+ if so, try again with the immediate dominator of PRO, and so on.
- FOR_BB_INSNS (bb, insn)
- if (NONDEBUG_INSN_P (insn))
- {
- if (requires_stack_frame_p (insn, prologue_used,
- set_up_by_prologue.set))
- {
- if (bb == (*entry_edge)->dest)
- goto fail_shrinkwrap;
- bitmap_set_bit (bb_flags, bb->index);
- vec.quick_push (bb);
- break;
- }
- else if (size <= max_grow_size)
- {
- size += get_attr_min_length (insn);
- if (size > max_grow_size)
- bitmap_set_bit (&bb_on_list, bb->index);
- }
- }
- }
+ The blocks that need duplicating are those reachable from PRO but
+ not dominated by it. We keep in BB_WITH a bitmap of the blocks
+ reachable from PRO that we already found, and in VEC a stack of
+ those we still need to consider (to find successors). */
- /* Blocks that really need a prologue, or are too big for tails. */
- bitmap_ior_into (&bb_on_list, bb_flags);
+ bitmap_set_bit (bb_with, pro->index);
- /* For every basic block that needs a prologue, mark all blocks
- reachable from it, so as to ensure they are also seen as
- requiring a prologue. */
- while (!vec.is_empty ())
- {
- basic_block tmp_bb = vec.pop ();
+ vec<basic_block> vec;
+ vec.create (n_basic_blocks_for_fn (cfun));
+ vec.quick_push (pro);
- FOR_EACH_EDGE (e, ei, tmp_bb->succs)
- if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
- && bitmap_set_bit (bb_flags, e->dest->index))
- vec.quick_push (e->dest);
- }
+ unsigned max_grow_size = get_uncond_jump_length ();
+ max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
- /* Find the set of basic blocks that need no prologue, have a
- single successor, can be duplicated, meet a max size
- requirement, and go to the exit via like blocks. */
- vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun));
- while (!vec.is_empty ())
- {
- basic_block tmp_bb = vec.pop ();
+ while (!vec.is_empty () && pro != entry)
+ {
+ basic_block bb = vec.pop ();
+ if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
+ while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
+ {
+ gcc_assert (pro != entry);
- FOR_EACH_EDGE (e, ei, tmp_bb->preds)
- if (single_succ_p (e->src)
- && !bitmap_bit_p (&bb_on_list, e->src->index)
- && can_duplicate_block_p (e->src))
- {
- edge pe;
- edge_iterator pei;
-
- /* If there is predecessor of e->src which doesn't
- need prologue and the edge is complex,
- we might not be able to redirect the branch
- to a copy of e->src. */
- FOR_EACH_EDGE (pe, pei, e->src->preds)
- if ((pe->flags & EDGE_COMPLEX) != 0
- && !bitmap_bit_p (bb_flags, pe->src->index))
- break;
- if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
- vec.quick_push (e->src);
- }
- }
+ pro = get_immediate_dominator (CDI_DOMINATORS, pro);
+
+ bitmap_set_bit (bb_with, pro->index);
+ vec.quick_push (pro);
+ }
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
+ && bitmap_set_bit (bb_with, e->dest->index))
+ vec.quick_push (e->dest);
+ }
+
+ vec.release ();
+
+ if (dump_file)
+ fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
+ pro->index);
+
+ /* If we can move PRO back without having to duplicate more blocks, do so.
+ We can move back to a block PRE if every path from PRE will eventually
+ need a prologue, that is, PRO is a post-dominator of PRE. */
+
+ if (pro != entry)
+ {
+ calculate_dominance_info (CDI_POST_DOMINATORS);
- /* Now walk backwards from every block that is marked as needing
- a prologue to compute the bb_antic_flags bitmap. Exclude
- tail blocks; They can be duplicated to be used on paths not
- needing a prologue. */
- bitmap_clear (&bb_on_list);
- bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail);
- FOR_EACH_BB_FN (bb, cfun)
+ while (pro != entry)
{
- if (!bitmap_bit_p (&bb_antic_flags, bb->index))
- continue;
- FOR_EACH_EDGE (e, ei, bb->preds)
- if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
- && bitmap_set_bit (&bb_on_list, e->src->index))
- vec.quick_push (e->src);
+ basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
+ if (dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
+ pro = pre;
+ else
+ break;
}
- while (!vec.is_empty ())
- {
- basic_block tmp_bb = vec.pop ();
- bool all_set = true;
- bitmap_clear_bit (&bb_on_list, tmp_bb->index);
- FOR_EACH_EDGE (e, ei, tmp_bb->succs)
- if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
- {
- all_set = false;
- break;
- }
+ free_dominance_info (CDI_POST_DOMINATORS);
+ }
- if (all_set)
- {
- bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
- FOR_EACH_EDGE (e, ei, tmp_bb->preds)
- if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
- && bitmap_set_bit (&bb_on_list, e->src->index))
- vec.quick_push (e->src);
- }
- }
- /* Find exactly one edge that leads to a block in ANTIC from
- a block that isn't. */
- if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index))
- FOR_EACH_BB_FN (bb, cfun)
+ if (dump_file)
+ fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
+ pro->index);
+
+ /* If there is more than one predecessor of PRO not dominated by PRO, fail.
+ Also find that single edge that leads to PRO. */
+
+ bool multi = false;
+ edge the_edge = 0;
+ FOR_EACH_EDGE (e, ei, pro->preds)
+ if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
+ {
+ if (the_edge)
+ multi = true;
+ else
+ the_edge = e;
+ }
+
+ if (multi)
+ {
+ the_edge = orig_entry_edge;
+
+ if (dump_file)
+ fprintf (dump_file, "More than one candidate edge.\n");
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "Found candidate edge for shrink-wrapping, %d->%d.\n",
+ the_edge->src->index, the_edge->dest->index);
+
+ *entry_edge = the_edge;
+
+ /* Compute what fraction of the frequency and count of the blocks that run
+ both with and without prologue are for running with prologue. This gives
+ the correct answer for reducible flow graphs; for irreducible flow graphs
+ our profile is messed up beyond repair anyway. */
+
+ int num = (*entry_edge)->probability;
+ int den = REG_BR_PROB_BASE;
+
+ if (*entry_edge == orig_entry_edge)
+ goto out;
+
+ /* Test whether the prologue is known to clobber any register
+ (other than FP or SP) which are live on the edge. */
+
+ HARD_REG_SET live_on_edge;
+ CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
+ if (frame_pointer_needed)
+ CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
+ REG_SET_TO_HARD_REG_SET (live_on_edge,
+ df_get_live_in ((*entry_edge)->dest));
+ if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
+ {
+ *entry_edge = orig_entry_edge;
+ if (dump_file)
+ fprintf (dump_file,
+ "Shrink-wrapping aborted due to clobber.\n");
+ goto out;
+ }
+
+ /* All is okay, so do it. */
+
+ crtl->shrink_wrapped = true;
+ if (dump_file)
+ fprintf (dump_file, "Performing shrink-wrapping.\n");
+
+ /* Copy the blocks that can run both with and without prologue. The
+ originals run with prologue, the copies without. Store a pointer to
+ the copy in the ->aux field of the original. */
+
+ FOR_EACH_BB_FN (bb, cfun)
+ if (bitmap_bit_p (bb_with, bb->index)
+ && !dominated_by_p (CDI_DOMINATORS, bb, pro))
+ {
+ basic_block dup = duplicate_block (bb, 0, 0);
+
+ bb->aux = dup;
+
+ if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
+ emit_barrier_after_bb (dup);
+
+ if (EDGE_COUNT (dup->succs) == 0)
+ emit_barrier_after_bb (dup);
+
+ if (dump_file)
+ fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
+
+ bb->frequency = RDIV (num * bb->frequency, den);
+ dup->frequency -= bb->frequency;
+ bb->count = RDIV (num * bb->count, den);
+ dup->count -= bb->count;
+ }
+
+ /* Change ENTRY_EDGE, if its src is duplicated. Do this first, before
+ the redirects have had a chance to create new blocks on the edge we
+ want to use for the prologue, which makes us not find it. */
+
+ gcc_assert (!dominated_by_p (CDI_DOMINATORS, (*entry_edge)->src, pro));
+
+ if (bitmap_bit_p (bb_with, (*entry_edge)->src->index))
+ {
+ basic_block src = (basic_block) (*entry_edge)->src->aux;
+ FOR_EACH_EDGE (e, ei, src->succs)
+ if (e->dest == pro)
+ *entry_edge = e;
+ }
+
+ /* Now change the edges to point to the copies, where appropriate. */
+
+ FOR_EACH_BB_FN (bb, cfun)
+ if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
+ {
+ basic_block src = bb;
+ if (bitmap_bit_p (bb_with, bb->index))
+ src = (basic_block) bb->aux;
+
+ FOR_EACH_EDGE (e, ei, src->succs)
{
- if (!bitmap_bit_p (&bb_antic_flags, bb->index))
+ if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
continue;
- FOR_EACH_EDGE (e, ei, bb->preds)
- if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
- {
- if (*entry_edge != orig_entry_edge)
- {
- *entry_edge = orig_entry_edge;
- if (dump_file)
- fprintf (dump_file, "More than one candidate edge.\n");
- goto fail_shrinkwrap;
- }
- if (dump_file)
- fprintf (dump_file, "Found candidate edge for "
- "shrink-wrapping, %d->%d.\n", e->src->index,
- e->dest->index);
- *entry_edge = e;
- }
- }
- if (*entry_edge != orig_entry_edge)
- {
- /* Test whether the prologue is known to clobber any register
- (other than FP or SP) which are live on the edge. */
- CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
- if (frame_pointer_needed)
- CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
- REG_SET_TO_HARD_REG_SET (live_on_edge,
- df_get_live_in ((*entry_edge)->dest));
- if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
- {
- *entry_edge = orig_entry_edge;
- if (dump_file)
- fprintf (dump_file,
- "Shrink-wrapping aborted due to clobber.\n");
- }
- }
- if (*entry_edge != orig_entry_edge)
- {
- crtl->shrink_wrapped = true;
- if (dump_file)
- fprintf (dump_file, "Performing shrink-wrapping.\n");
-
- /* Find tail blocks reachable from both blocks needing a
- prologue and blocks not needing a prologue. */
- if (!bitmap_empty_p (&bb_tail))
- FOR_EACH_BB_FN (bb, cfun)
+ if (bitmap_bit_p (bb_with, e->dest->index)
+ && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
{
- bool some_pro, some_no_pro;
- if (!bitmap_bit_p (&bb_tail, bb->index))
- continue;
- some_pro = some_no_pro = false;
- FOR_EACH_EDGE (e, ei, bb->preds)
- {
- if (bitmap_bit_p (bb_flags, e->src->index))
- some_pro = true;
- else
- some_no_pro = true;
- }
- if (some_pro && some_no_pro)
- vec.quick_push (bb);
- else
- bitmap_clear_bit (&bb_tail, bb->index);
+ if (dump_file)
+ fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
+ e->src->index, e->dest->index,
+ ((basic_block) e->dest->aux)->index);
+ redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
}
- /* Find the head of each tail. */
- while (!vec.is_empty ())
- {
- basic_block tbb = vec.pop ();
+ else if (e->flags & EDGE_FALLTHRU
+ && bitmap_bit_p (bb_with, bb->index))
+ force_nonfallthru (e);
+ }
+ }
- if (!bitmap_bit_p (&bb_tail, tbb->index))
- continue;
+ /* Also redirect the function entry edge if necessary. */
- while (single_succ_p (tbb))
- {
- tbb = single_succ (tbb);
- bitmap_clear_bit (&bb_tail, tbb->index);
- }
- }
- /* Now duplicate the tails. */
- if (!bitmap_empty_p (&bb_tail))
- FOR_EACH_BB_REVERSE_FN (bb, cfun)
- {
- basic_block copy_bb, tbb;
- int eflags;
-
- if (!bitmap_clear_bit (&bb_tail, bb->index))
- continue;
-
- /* Create a copy of BB, instructions and all, for
- use on paths that don't need a prologue.
- Ideal placement of the copy is on a fall-thru edge
- or after a block that would jump to the copy. */
- FOR_EACH_EDGE (e, ei, bb->preds)
- if (!bitmap_bit_p (bb_flags, e->src->index)
- && single_succ_p (e->src))
- break;
- if (e)
- {
- /* Make sure we insert after any barriers. */
- rtx_insn *end = get_last_bb_insn (e->src);
- copy_bb = create_basic_block (NEXT_INSN (end),
- NULL_RTX, e->src);
- BB_COPY_PARTITION (copy_bb, e->src);
- }
- else
- {
- /* Otherwise put the copy at the end of the function. */
- copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
- EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
- BB_COPY_PARTITION (copy_bb, bb);
- }
-
- rtx_note *insert_point = emit_note_after (NOTE_INSN_DELETED,
- BB_END (copy_bb));
- emit_barrier_after (BB_END (copy_bb));
-
- tbb = bb;
- while (1)
- {
- dup_block_and_redirect (tbb, copy_bb, insert_point,
- bb_flags);
- tbb = single_succ (tbb);
- if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
- break;
- e = split_block (copy_bb, PREV_INSN (insert_point));
- copy_bb = e->dest;
- }
-
- /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
- We have yet to add a simple_return to the tails,
- as we'd like to first convert_jumps_to_returns in
- case the block is no longer used after that. */
- eflags = EDGE_FAKE;
- if (CALL_P (PREV_INSN (insert_point))
- && SIBLING_CALL_P (PREV_INSN (insert_point)))
- eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
- make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun),
- eflags);
-
- /* verify_flow_info doesn't like a note after a
- sibling call. */
- delete_insn (insert_point);
- if (bitmap_empty_p (&bb_tail))
- break;
- }
- }
+ FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
+ if (bitmap_bit_p (bb_with, e->dest->index)
+ && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
+ {
+ basic_block split_bb = split_edge (e);
+ e = single_succ_edge (split_bb);
+ redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
+ }
- fail_shrinkwrap:
- bitmap_clear (&bb_tail);
- bitmap_clear (&bb_antic_flags);
- bitmap_clear (&bb_on_list);
- vec.release ();
- }
+ /* Change all the exits that should get a simple_return to FAKE.
+ They will be converted later. */
+
+ FOR_EACH_BB_FN (bb, cfun)
+ if (!bitmap_bit_p (bb_with, bb->index))
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
+ {
+ e = fix_fake_fallthrough_edge (e);
+
+ e->flags &= ~EDGE_FALLTHRU;
+ if (!(e->flags & EDGE_SIBCALL))
+ e->flags |= EDGE_FAKE;
+
+ emit_barrier_after_bb (e->src);
+ }
+
+out:
+ free_dominance_info (CDI_DOMINATORS);
}
/* If we're allowed to generate a simple return instruction, then by
@@ -1018,6 +1072,8 @@ convert_to_simple_return (edge entry_edge, edge orig_entry_edge,
&& (e->flags & EDGE_FAKE) != 0
&& !bitmap_bit_p (&bb_flags, e->src->index))
{
+ e = fix_fake_fallthrough_edge (e);
+
emit_return_into_block (true, e->src);
e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
}