diff options
author | Bernd Schmidt <bernds@codesourcery.com> | 2010-12-14 00:23:40 +0000 |
---|---|---|
committer | Bernd Schmidt <bernds@gcc.gnu.org> | 2010-12-14 00:23:40 +0000 |
commit | 4ec5d4f5b9a41a337e768260b8df23ab2cbc1dbb (patch) | |
tree | 7224059fe740ae1327ffaffd7f83fc7d51dee038 /gcc | |
parent | 218cbe3757e7268d53966a2fe8d71acd1569a77c (diff) | |
download | gcc-4ec5d4f5b9a41a337e768260b8df23ab2cbc1dbb.zip gcc-4ec5d4f5b9a41a337e768260b8df23ab2cbc1dbb.tar.gz gcc-4ec5d4f5b9a41a337e768260b8df23ab2cbc1dbb.tar.bz2 |
re PR rtl-optimization/44374 (Hoist same instructions in different branches)
gcc/
PR rtl-optimization/44374
Reapply patch with fixes.
* basic-block.h (enum bb_flags): Add BB_MODIFIED.
* df-core.c (df_set_bb_dirty): Set it.
* ifcvt.c (find_memory): Remove function.
(dead_or_predicable): Use can_move_insns_across.
* df.h (can_move_insns_across): Declare function.
* cfgcleanup.c (block_was_dirty): New static variable.
(flow_find_head_matching_sequence): Test for epilogue notes.
(try_crossjump_bb, try_forward_edges): Test BB_MODIFIED flag rather
than df_get_bb_dirty.
(try_head_merge_bb): New static function.
(try_optimize_cfg): Call it. Call df_analyze if block_was_dirty
is set.
* df-problems.c: Include "target.h"
(df_simulate_find_uses): New static function.
(MEMREF_NORMAL, MEMREF_VOLATILE): New macros.
(find_memory, find_memory_store): New static functions.
(can_move_insns_across): New function.
* Makefile.in (df-problems.o): Update dependencies.
gcc/testsuite/
PR rtl-optimization/44374
Reapply patch with fixes.
* gcc.target/arm/headmerge-1.c: New test.
* gcc.target/arm/headmerge-2.c: New test.
* gcc.target/i386/headmerge-1.c: New test.
* gcc.target/i386/headmerge-2.c: New test.
From-SVN: r167779
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 23 | ||||
-rw-r--r-- | gcc/Makefile.in | 2 | ||||
-rw-r--r-- | gcc/basic-block.h | 8 | ||||
-rw-r--r-- | gcc/cfgcleanup.c | 341 | ||||
-rw-r--r-- | gcc/df-core.c | 1 | ||||
-rw-r--r-- | gcc/df-problems.c | 318 | ||||
-rw-r--r-- | gcc/df.h | 4 | ||||
-rw-r--r-- | gcc/ifcvt.c | 150 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/headmerge-1.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/headmerge-2.c | 35 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/headmerge-1.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/headmerge-2.c | 35 |
13 files changed, 817 insertions, 137 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c832c34..8288373 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2010-12-14 Bernd Schmidt <bernds@codesourcery.com> + + PR rtl-optimization/44374 + Reapply patch with fixes. + * basic-block.h (enum bb_flags): Add BB_MODIFIED. + * df-core.c (df_set_bb_dirty): Set it. + * ifcvt.c (find_memory): Remove function. + (dead_or_predicable): Use can_move_insns_across. + * df.h (can_move_insns_across): Declare function. + * cfgcleanup.c (block_was_dirty): New static variable. + (flow_find_head_matching_sequence): Test for epilogue notes. + (try_crossjump_bb, try_forward_edges): Test BB_MODIFIED flag rather + than df_get_bb_dirty. + (try_head_merge_bb): New static function. + (try_optimize_cfg): Call it. Call df_analyze if block_was_dirty + is set. + * df-problems.c: Include "target.h" + (df_simulate_find_uses): New static function. + (MEMREF_NORMAL, MEMREF_VOLATILE): New macros. + (find_memory, find_memory_store): New static functions. + (can_move_insns_across): New function. + * Makefile.in (df-problems.o): Update dependencies. + 2010-12-13 Joseph Myers <joseph@codesourcery.com> * config/xtensa/elf.h (SIZE_TYPE, PTRDIFF_TYPE): Define. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index ae195e3..f031630 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3194,7 +3194,7 @@ df-core.o : df-core.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ df-problems.o : df-problems.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \ hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \ - $(TM_P_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h + $(TM_P_H) $(TARGET_H) $(FLAGS_H) output.h $(EXCEPT_H) dce.h vecprim.h df-scan.o : df-scan.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \ hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \ diff --git a/gcc/basic-block.h b/gcc/basic-block.h index be0a1d1..3594eea 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -245,7 +245,13 @@ enum bb_flags /* Set on blocks that cannot be threaded through. Only used in cfgcleanup.c. */ - BB_NONTHREADABLE_BLOCK = 1 << 11 + BB_NONTHREADABLE_BLOCK = 1 << 11, + + /* Set on blocks that were modified in some way. This bit is set in + df_set_bb_dirty, but not cleared by df_analyze, so it can be used + to test whether a block has been modified prior to a df_analyze + call. */ + BB_MODIFIED = 1 << 12 }; /* Dummy flag for convenience in the hot/cold partitioning code. */ diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c index 78635d2..bf6ca45 100644 --- a/gcc/cfgcleanup.c +++ b/gcc/cfgcleanup.c @@ -65,6 +65,10 @@ static bool first_pass; /* Set to true if crossjumps occured in the latest run of try_optimize_cfg. */ static bool crossjumps_occured; +/* Set to true if we couldn't run an optimization due to stale liveness + information; we should run df_analyze to enable more opportunities. */ +static bool block_was_dirty; + static bool try_crossjump_to_edge (int, edge, edge); static bool try_crossjump_bb (int, basic_block); static bool outgoing_edges_match (int, basic_block, basic_block); @@ -431,7 +435,7 @@ try_forward_edges (int mode, basic_block b) int counter, goto_locus; bool threaded = false; int nthreaded_edges = 0; - bool may_thread = first_pass | df_get_bb_dirty (b); + bool may_thread = first_pass || (b->flags & BB_MODIFIED) != 0; /* Skip complex edges because we don't know how to update them. @@ -466,7 +470,7 @@ try_forward_edges (int mode, basic_block b) { basic_block new_target = NULL; bool new_target_threaded = false; - may_thread |= df_get_bb_dirty (target); + may_thread |= (target->flags & BB_MODIFIED) != 0; if (FORWARDER_BLOCK_P (target) && !(single_succ_edge (target)->flags & EDGE_CROSSING) @@ -1184,12 +1188,20 @@ flow_find_head_matching_sequence (basic_block bb1, basic_block bb2, rtx *f1, while (true) { - /* Ignore notes. */ + /* Ignore notes, except NOTE_INSN_EPILOGUE_BEG. */ while (!NONDEBUG_INSN_P (i1) && i1 != BB_END (bb1)) - i1 = NEXT_INSN (i1); + { + if (NOTE_P (i1) && NOTE_KIND (i1) == NOTE_INSN_EPILOGUE_BEG) + break; + i1 = NEXT_INSN (i1); + } while (!NONDEBUG_INSN_P (i2) && i2 != BB_END (bb2)) - i2 = NEXT_INSN (i2); + { + if (NOTE_P (i2) && NOTE_KIND (i2) == NOTE_INSN_EPILOGUE_BEG) + break; + i2 = NEXT_INSN (i2); + } if ((i1 == BB_END (bb1) && !NONDEBUG_INSN_P (i1)) || (i2 == BB_END (bb2) && !NONDEBUG_INSN_P (i2))) @@ -1856,8 +1868,8 @@ try_crossjump_bb (int mode, basic_block bb) /* If nothing changed since the last attempt, there is nothing we can do. */ if (!first_pass - && (!(df_get_bb_dirty (e->src)) - && !(df_get_bb_dirty (fallthru->src)))) + && !((e->src->flags & BB_MODIFIED) + || (fallthru->src->flags & BB_MODIFIED))) continue; if (try_crossjump_to_edge (mode, e, fallthru)) @@ -1906,8 +1918,8 @@ try_crossjump_bb (int mode, basic_block bb) /* If nothing changed since the last attempt, there is nothing we can do. */ if (!first_pass - && (!(df_get_bb_dirty (e->src)) - && !(df_get_bb_dirty (e2->src)))) + && !((e->src->flags & BB_MODIFIED) + || (e2->src->flags & BB_MODIFIED))) continue; if (try_crossjump_to_edge (mode, e, e2)) @@ -1926,6 +1938,299 @@ try_crossjump_bb (int mode, basic_block bb) return changed; } +/* Search the successors of BB for common insn sequences. When found, + share code between them by moving it across the basic block + boundary. Return true if any changes made. */ + +static bool +try_head_merge_bb (basic_block bb) +{ + basic_block final_dest_bb = NULL; + int max_match = INT_MAX; + edge e0; + rtx *headptr, *currptr, *nextptr; + bool changed, moveall; + unsigned ix; + rtx e0_last_head, cond, move_before; + unsigned nedges = EDGE_COUNT (bb->succs); + rtx jump = BB_END (bb); + regset live, live_union; + + /* Nothing to do if there is not at least two outgoing edges. */ + if (nedges < 2) + return false; + + /* Don't crossjump if this block ends in a computed jump, + unless we are optimizing for size. */ + if (optimize_bb_for_size_p (bb) + && bb != EXIT_BLOCK_PTR + && computed_jump_p (BB_END (bb))) + return false; + + cond = get_condition (jump, &move_before, true, false); + if (cond == NULL_RTX) + move_before = jump; + + for (ix = 0; ix < nedges; ix++) + if (EDGE_SUCC (bb, ix)->dest == EXIT_BLOCK_PTR) + return false; + + for (ix = 0; ix < nedges; ix++) + { + edge e = EDGE_SUCC (bb, ix); + basic_block other_bb = e->dest; + + if (df_get_bb_dirty (other_bb)) + { + block_was_dirty = true; + return false; + } + + if (e->flags & EDGE_ABNORMAL) + return false; + + /* Normally, all destination blocks must only be reachable from this + block, i.e. they must have one incoming edge. + + There is one special case we can handle, that of multiple consecutive + jumps where the first jumps to one of the targets of the second jump. + This happens frequently in switch statements for default labels. + The structure is as follows: + FINAL_DEST_BB + .... + if (cond) jump A; + fall through + BB + jump with targets A, B, C, D... + A + has two incoming edges, from FINAL_DEST_BB and BB + + In this case, we can try to move the insns through BB and into + FINAL_DEST_BB. */ + if (EDGE_COUNT (other_bb->preds) != 1) + { + edge incoming_edge, incoming_bb_other_edge; + edge_iterator ei; + + if (final_dest_bb != NULL + || EDGE_COUNT (other_bb->preds) != 2) + return false; + + /* We must be able to move the insns across the whole block. */ + move_before = BB_HEAD (bb); + while (!NONDEBUG_INSN_P (move_before)) + move_before = NEXT_INSN (move_before); + + if (EDGE_COUNT (bb->preds) != 1) + return false; + incoming_edge = EDGE_PRED (bb, 0); + final_dest_bb = incoming_edge->src; + if (EDGE_COUNT (final_dest_bb->succs) != 2) + return false; + FOR_EACH_EDGE (incoming_bb_other_edge, ei, final_dest_bb->succs) + if (incoming_bb_other_edge != incoming_edge) + break; + if (incoming_bb_other_edge->dest != other_bb) + return false; + } + } + + e0 = EDGE_SUCC (bb, 0); + e0_last_head = NULL_RTX; + changed = false; + + for (ix = 1; ix < nedges; ix++) + { + edge e = EDGE_SUCC (bb, ix); + rtx e0_last, e_last; + int nmatch; + + nmatch = flow_find_head_matching_sequence (e0->dest, e->dest, + &e0_last, &e_last, 0); + if (nmatch == 0) + return false; + + if (nmatch < max_match) + { + max_match = nmatch; + e0_last_head = e0_last; + } + } + + /* If we matched an entire block, we probably have to avoid moving the + last insn. */ + if (max_match > 0 + && e0_last_head == BB_END (e0->dest) + && (find_reg_note (e0_last_head, REG_EH_REGION, 0) + || control_flow_insn_p (e0_last_head))) + { + max_match--; + if (max_match == 0) + return false; + do + e0_last_head = prev_real_insn (e0_last_head); + while (DEBUG_INSN_P (e0_last_head)); + } + + if (max_match == 0) + return false; + + /* We must find a union of the live registers at each of the end points. */ + live = BITMAP_ALLOC (NULL); + live_union = BITMAP_ALLOC (NULL); + + currptr = XNEWVEC (rtx, nedges); + headptr = XNEWVEC (rtx, nedges); + nextptr = XNEWVEC (rtx, nedges); + + for (ix = 0; ix < nedges; ix++) + { + int j; + basic_block merge_bb = EDGE_SUCC (bb, ix)->dest; + rtx head = BB_HEAD (merge_bb); + + while (!NONDEBUG_INSN_P (head)) + head = NEXT_INSN (head); + headptr[ix] = head; + currptr[ix] = head; + + /* Compute the end point and live information */ + for (j = 1; j < max_match; j++) + do + head = NEXT_INSN (head); + while (!NONDEBUG_INSN_P (head)); + simulate_backwards_to_point (merge_bb, live, head); + IOR_REG_SET (live_union, live); + } + + /* If we're moving across two blocks, verify the validity of the + first move, then adjust the target and let the loop below deal + with the final move. */ + if (final_dest_bb != NULL) + { + rtx move_upto; + + moveall = can_move_insns_across (currptr[0], e0_last_head, move_before, + jump, e0->dest, live_union, + NULL, &move_upto); + if (!moveall) + { + if (move_upto == NULL_RTX) + goto out; + + while (e0_last_head != move_upto) + { + df_simulate_one_insn_backwards (e0->dest, e0_last_head, + live_union); + e0_last_head = PREV_INSN (e0_last_head); + } + } + if (e0_last_head == NULL_RTX) + goto out; + + jump = BB_END (final_dest_bb); + cond = get_condition (jump, &move_before, true, false); + if (cond == NULL_RTX) + move_before = jump; + } + + do + { + rtx move_upto; + moveall = can_move_insns_across (currptr[0], e0_last_head, + move_before, jump, e0->dest, live_union, + NULL, &move_upto); + if (!moveall && move_upto == NULL_RTX) + { + if (jump == move_before) + break; + + /* Try again, using a different insertion point. */ + move_before = jump; + +#ifdef HAVE_cc0 + /* Don't try moving before a cc0 user, as that may invalidate + the cc0. */ + if (reg_mentioned_p (cc0_rtx, jump)) + break; +#endif + + continue; + } + + if (final_dest_bb && !moveall) + /* We haven't checked whether a partial move would be OK for the first + move, so we have to fail this case. */ + break; + + changed = true; + for (;;) + { + if (currptr[0] == move_upto) + break; + for (ix = 0; ix < nedges; ix++) + { + rtx curr = currptr[ix]; + do + curr = NEXT_INSN (curr); + while (!NONDEBUG_INSN_P (curr)); + currptr[ix] = curr; + } + } + + /* If we can't currently move all of the identical insns, remember + each insn after the range that we'll merge. */ + if (!moveall) + for (ix = 0; ix < nedges; ix++) + { + rtx curr = currptr[ix]; + do + curr = NEXT_INSN (curr); + while (!NONDEBUG_INSN_P (curr)); + nextptr[ix] = curr; + } + + reorder_insns (headptr[0], currptr[0], PREV_INSN (move_before)); + df_set_bb_dirty (EDGE_SUCC (bb, 0)->dest); + if (final_dest_bb != NULL) + df_set_bb_dirty (final_dest_bb); + df_set_bb_dirty (bb); + for (ix = 1; ix < nedges; ix++) + { + df_set_bb_dirty (EDGE_SUCC (bb, ix)->dest); + delete_insn_chain (headptr[ix], currptr[ix], false); + } + if (!moveall) + { + if (jump == move_before) + break; + + /* For the unmerged insns, try a different insertion point. */ + move_before = jump; + +#ifdef HAVE_cc0 + /* Don't try moving before a cc0 user, as that may invalidate + the cc0. */ + if (reg_mentioned_p (cc0_rtx, jump)) + break; +#endif + + for (ix = 0; ix < nedges; ix++) + currptr[ix] = headptr[ix] = nextptr[ix]; + } + } + while (!moveall); + + out: + free (currptr); + free (headptr); + free (nextptr); + + crossjumps_occured |= changed; + + return changed; +} + /* Return true if BB contains just bb note, or bb note followed by only DEBUG_INSNs. */ @@ -1971,6 +2276,7 @@ try_optimize_cfg (int mode) one predecessor, they may be combined. */ do { + block_was_dirty = false; changed = false; iterations++; @@ -2170,6 +2476,13 @@ try_optimize_cfg (int mode) && try_crossjump_bb (mode, b)) changed_here = true; + if ((mode & CLEANUP_CROSSJUMP) + /* This can lengthen register lifetimes. Do it only after + reload. */ + && reload_completed + && try_head_merge_bb (b)) + changed_here = true; + /* Don't get confused by the index shift caused by deleting blocks. */ if (!changed_here) @@ -2182,6 +2495,13 @@ try_optimize_cfg (int mode) && try_crossjump_bb (mode, EXIT_BLOCK_PTR)) changed = true; + if (block_was_dirty) + { + /* This should only be set by head-merging. */ + gcc_assert (mode & CLEANUP_CROSSJUMP); + df_analyze (); + } + #ifdef ENABLE_CHECKING if (changed) verify_flow_info (); @@ -2366,8 +2686,7 @@ cleanup_cfg (int mode) if ((mode & CLEANUP_EXPENSIVE) && !reload_completed && !delete_trivially_dead_insns (get_insns (), max_reg_num ())) break; - else if ((mode & CLEANUP_CROSSJUMP) - && crossjumps_occured) + if ((mode & CLEANUP_CROSSJUMP) && crossjumps_occured) run_fast_dce (); } else diff --git a/gcc/df-core.c b/gcc/df-core.c index 06ff854..36270bf 100644 --- a/gcc/df-core.c +++ b/gcc/df-core.c @@ -1413,6 +1413,7 @@ df_get_bb_dirty (basic_block bb) void df_set_bb_dirty (basic_block bb) { + bb->flags |= BB_MODIFIED; if (df) { int p; diff --git a/gcc/df-problems.c b/gcc/df-problems.c index 559af91..d9fbc85 100644 --- a/gcc/df-problems.c +++ b/gcc/df-problems.c @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see #include "basic-block.h" #include "sbitmap.h" #include "bitmap.h" +#include "target.h" #include "timevar.h" #include "df.h" #include "except.h" @@ -3541,6 +3542,27 @@ df_simulate_find_defs (rtx insn, bitmap defs) } } +/* Find the set of uses for INSN. This includes partial defs. */ + +static void +df_simulate_find_uses (rtx insn, bitmap uses) +{ + df_ref *rec; + unsigned int uid = INSN_UID (insn); + + for (rec = DF_INSN_UID_DEFS (uid); *rec; rec++) + { + df_ref def = *rec; + if (DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)) + bitmap_set_bit (uses, DF_REF_REGNO (def)); + } + for (rec = DF_INSN_UID_USES (uid); *rec; rec++) + { + df_ref use = *rec; + bitmap_set_bit (uses, DF_REF_REGNO (use)); + } +} + /* Find the set of real DEFs, which are not clobbers, for INSN. */ void @@ -3768,7 +3790,303 @@ df_simulate_one_insn_forwards (basic_block bb, rtx insn, bitmap live) } df_simulate_fixup_sets (bb, live); } + +/* Used by the next two functions to encode information about the + memory references we found. */ +#define MEMREF_NORMAL 1 +#define MEMREF_VOLATILE 2 + +/* A subroutine of can_move_insns_across_p called through for_each_rtx. + Return either MEMREF_NORMAL or MEMREF_VOLATILE if a memory is found. */ + +static int +find_memory (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + if (GET_CODE (x) == ASM_OPERANDS && MEM_VOLATILE_P (x)) + return MEMREF_VOLATILE; + + if (!MEM_P (x)) + return 0; + if (MEM_VOLATILE_P (x)) + return MEMREF_VOLATILE; + if (MEM_READONLY_P (x)) + return 0; + + return MEMREF_NORMAL; +} + +/* A subroutine of can_move_insns_across_p called through note_stores. + DATA points to an integer in which we set either the bit for + MEMREF_NORMAL or the bit for MEMREF_VOLATILE if we find a MEM + of either kind. */ + +static void +find_memory_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + int *pflags = (int *)data; + if (GET_CODE (x) == SUBREG) + x = XEXP (x, 0); + /* Treat stores to SP as stores to memory, this will prevent problems + when there are references to the stack frame. */ + if (x == stack_pointer_rtx) + *pflags |= MEMREF_VOLATILE; + if (!MEM_P (x)) + return; + *pflags |= MEM_VOLATILE_P (x) ? MEMREF_VOLATILE : MEMREF_NORMAL; +} + +/* Scan BB backwards, using df_simulate functions to keep track of + lifetimes, up to insn POINT. The result is stored in LIVE. */ + +void +simulate_backwards_to_point (basic_block bb, regset live, rtx point) +{ + rtx insn; + bitmap_copy (live, df_get_live_out (bb)); + df_simulate_initialize_backwards (bb, live); + + /* Scan and update life information until we reach the point we're + interested in. */ + for (insn = BB_END (bb); insn != point; insn = PREV_INSN (insn)) + df_simulate_one_insn_backwards (bb, insn, live); +} + +/* Return true if it is safe to move a group of insns, described by + the range FROM to TO, backwards across another group of insns, + described by ACROSS_FROM to ACROSS_TO. It is assumed that there + are no insns between ACROSS_TO and FROM, but they may be in + different basic blocks; MERGE_BB is the block from which the + insns will be moved. The caller must pass in a regset MERGE_LIVE + which specifies the registers live after TO. + + This function may be called in one of two cases: either we try to + move identical instructions from all successor blocks into their + predecessor, or we try to move from only one successor block. If + OTHER_BRANCH_LIVE is nonnull, it indicates that we're dealing with + the second case. It should contain a set of registers live at the + end of ACROSS_TO which must not be clobbered by moving the insns. + In that case, we're also more careful about moving memory references + and trapping insns. + + We return false if it is not safe to move the entire group, but it + may still be possible to move a subgroup. PMOVE_UPTO, if nonnull, + is set to point at the last moveable insn in such a case. */ +bool +can_move_insns_across (rtx from, rtx to, rtx across_from, rtx across_to, + basic_block merge_bb, regset merge_live, + regset other_branch_live, rtx *pmove_upto) +{ + rtx insn, next, max_to; + bitmap merge_set, merge_use, local_merge_live; + bitmap test_set, test_use; + unsigned i, fail = 0; + bitmap_iterator bi; + int memrefs_in_across = 0; + int mem_sets_in_across = 0; + bool trapping_insns_in_across = false; + + if (pmove_upto != NULL) + *pmove_upto = NULL_RTX; + + /* Find real bounds, ignoring debug insns. */ + while (!NONDEBUG_INSN_P (from) && from != to) + from = NEXT_INSN (from); + while (!NONDEBUG_INSN_P (to) && from != to) + to = PREV_INSN (to); + + for (insn = across_to; ; insn = next) + { + if (NONDEBUG_INSN_P (insn)) + { + memrefs_in_across |= for_each_rtx (&PATTERN (insn), find_memory, + NULL); + note_stores (PATTERN (insn), find_memory_stores, + &mem_sets_in_across); + /* This is used just to find sets of the stack pointer. */ + memrefs_in_across |= mem_sets_in_across; + trapping_insns_in_across |= may_trap_p (PATTERN (insn)); + } + next = PREV_INSN (insn); + if (insn == across_from) + break; + } + + /* Collect: + MERGE_SET = set of registers set in MERGE_BB + MERGE_USE = set of registers used in MERGE_BB and live at its top + MERGE_LIVE = set of registers live at the point inside the MERGE + range that we've reached during scanning + TEST_SET = set of registers set between ACROSS_FROM and ACROSS_END. + TEST_USE = set of registers used between ACROSS_FROM and ACROSS_END, + and live before ACROSS_FROM. */ + + merge_set = BITMAP_ALLOC (®_obstack); + merge_use = BITMAP_ALLOC (®_obstack); + local_merge_live = BITMAP_ALLOC (®_obstack); + test_set = BITMAP_ALLOC (®_obstack); + test_use = BITMAP_ALLOC (®_obstack); + + /* Compute the set of registers set and used in the ACROSS range. */ + if (other_branch_live != NULL) + bitmap_copy (test_use, other_branch_live); + df_simulate_initialize_backwards (merge_bb, test_use); + for (insn = across_to; ; insn = next) + { + if (NONDEBUG_INSN_P (insn)) + { + df_simulate_find_defs (insn, test_set); + df_simulate_defs (insn, test_use); + df_simulate_uses (insn, test_use); + } + next = PREV_INSN (insn); + if (insn == across_from) + break; + } + + /* Compute an upper bound for the amount of insns moved, by finding + the first insn in MERGE that sets a register in TEST_USE, or uses + a register in TEST_SET. We also check for calls, trapping operations, + and memory references. */ + max_to = NULL_RTX; + for (insn = from; ; insn = next) + { + if (CALL_P (insn)) + break; + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) + break; + if (NONDEBUG_INSN_P (insn)) + { + if (may_trap_p (PATTERN (insn)) + && (trapping_insns_in_across || other_branch_live != NULL)) + break; + + /* We cannot move memory stores past each other, or move memory + reads past stores, at least not without tracking them and + calling true_dependence on every pair. + + If there is no other branch and no memory references or + sets in the ACROSS range, we can move memory references + freely, even volatile ones. + + Otherwise, the rules are as follows: volatile memory + references and stores can't be moved at all, and any type + of memory reference can't be moved if there are volatile + accesses or stores in the ACROSS range. That leaves + normal reads, which can be moved, as the trapping case is + dealt with elsewhere. */ + if (other_branch_live != NULL || memrefs_in_across != 0) + { + int mem_ref_flags = 0; + int mem_set_flags = 0; + note_stores (PATTERN (insn), find_memory_stores, &mem_set_flags); + mem_ref_flags = for_each_rtx (&PATTERN (insn), find_memory, + NULL); + /* Catch sets of the stack pointer. */ + mem_ref_flags |= mem_set_flags; + + if ((mem_ref_flags | mem_set_flags) & MEMREF_VOLATILE) + break; + if ((memrefs_in_across & MEMREF_VOLATILE) && mem_ref_flags != 0) + break; + if (mem_set_flags != 0 + || (mem_sets_in_across != 0 && mem_ref_flags != 0)) + break; + } + df_simulate_find_uses (insn, merge_use); + /* We're only interested in uses which use a value live at + the top, not one previously set in this block. */ + bitmap_and_compl_into (merge_use, merge_set); + df_simulate_find_defs (insn, merge_set); + if (bitmap_intersect_p (merge_set, test_use) + || bitmap_intersect_p (merge_use, test_set)) + break; + max_to = insn; + } + next = NEXT_INSN (insn); + if (insn == to) + break; + } + if (max_to != to) + fail = 1; + + if (max_to == NULL_RTX || (fail && pmove_upto == NULL)) + goto out; + + /* Now, lower this upper bound by also taking into account that + a range of insns moved across ACROSS must not leave a register + live at the end that will be clobbered in ACROSS. We need to + find a point where TEST_SET & LIVE == 0. + + Insns in the MERGE range that set registers which are also set + in the ACROSS range may still be moved as long as we also move + later insns which use the results of the set, and make the + register dead again. This is verified by the condition stated + above. We only need to test it for registers that are set in + the moved region. + + MERGE_LIVE is provided by the caller and holds live registers after + TO. */ + bitmap_copy (local_merge_live, merge_live); + for (insn = to; insn != max_to; insn = PREV_INSN (insn)) + df_simulate_one_insn_backwards (merge_bb, insn, local_merge_live); + + /* We're not interested in registers that aren't set in the moved + region at all. */ + bitmap_and_into (local_merge_live, merge_set); + for (;;) + { + if (NONDEBUG_INSN_P (insn)) + { + if (!bitmap_intersect_p (test_set, local_merge_live)) + { + max_to = insn; + break; + } + + df_simulate_one_insn_backwards (merge_bb, insn, + local_merge_live); + } + if (insn == from) + { + fail = 1; + goto out; + } + insn = PREV_INSN (insn); + } + + if (max_to != to) + fail = 1; + + if (pmove_upto) + *pmove_upto = max_to; + + /* For small register class machines, don't lengthen lifetimes of + hard registers before reload. */ + if (! reload_completed + && targetm.small_register_classes_for_mode_p (VOIDmode)) + { + EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi) + { + if (i < FIRST_PSEUDO_REGISTER + && ! fixed_regs[i] + && ! global_regs[i]) + fail = 1; + } + } + + out: + BITMAP_FREE (merge_set); + BITMAP_FREE (merge_use); + BITMAP_FREE (local_merge_live); + BITMAP_FREE (test_set); + BITMAP_FREE (test_use); + + return !fail; +} /*---------------------------------------------------------------------------- @@ -970,7 +970,9 @@ extern void df_simulate_one_insn_backwards (basic_block, rtx, bitmap); extern void df_simulate_finalize_backwards (basic_block, bitmap); extern void df_simulate_initialize_forwards (basic_block, bitmap); extern void df_simulate_one_insn_forwards (basic_block, rtx, bitmap); - +extern void simulate_backwards_to_point (basic_block, regset, rtx); +extern bool can_move_insns_across (rtx, rtx, rtx, rtx, basic_block, regset, + regset, rtx *); /* Functions defined in df-scan.c. */ extern void df_scan_alloc (bitmap); diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index e5a8835..eee5cc7 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -102,7 +102,6 @@ static int noce_find_if_block (basic_block, edge, edge, int); static int cond_exec_find_if_block (ce_if_block_t *); static int find_if_case_1 (basic_block, edge, edge); static int find_if_case_2 (basic_block, edge, edge); -static int find_memory (rtx *, void *); static int dead_or_predicable (basic_block, basic_block, basic_block, basic_block, int); static void noce_emit_move_insn (rtx, rtx); @@ -3975,15 +3974,6 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge) return TRUE; } -/* A subroutine of dead_or_predicable called through for_each_rtx. - Return 1 if a memory is found. */ - -static int -find_memory (rtx *px, void *data ATTRIBUTE_UNUSED) -{ - return MEM_P (*px); -} - /* Used by the code above to perform the actual rtl transformations. Return TRUE if successful. @@ -3997,7 +3987,7 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, basic_block other_bb, basic_block new_dest, int reversep) { rtx head, end, jump, earliest = NULL_RTX, old_dest, new_label = NULL_RTX; - bitmap merge_set = NULL, merge_set_noclobber = NULL; + bitmap merge_set = NULL; /* Number of pending changes. */ int n_validated_changes = 0; @@ -4087,129 +4077,46 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, } #endif + /* If we allocated new pseudos (e.g. in the conditional move + expander called from noce_emit_cmove), we must resize the + array first. */ + if (max_regno < max_reg_num ()) + max_regno = max_reg_num (); + /* Try the NCE path if the CE path did not result in any changes. */ if (n_validated_changes == 0) { + rtx cond, insn; + regset live; + bool success; + /* In the non-conditional execution case, we have to verify that there are no trapping operations, no calls, no references to memory, and that any registers modified are dead at the branch site. */ - rtx insn, cond, prev; - bitmap test_live, test_set; - bool intersect = false; - - /* Check for no calls or trapping operations. */ - for (insn = head; ; insn = NEXT_INSN (insn)) - { - if (CALL_P (insn)) - return FALSE; - if (NONDEBUG_INSN_P (insn)) - { - if (may_trap_p (PATTERN (insn))) - return FALSE; - - /* ??? Even non-trapping memories such as stack frame - references must be avoided. For stores, we collect - no lifetime info; for reads, we'd have to assert - true_dependence false against every store in the - TEST range. */ - if (for_each_rtx (&PATTERN (insn), find_memory, NULL)) - return FALSE; - } - if (insn == end) - break; - } - - if (! any_condjump_p (jump)) + if (!any_condjump_p (jump)) return FALSE; /* Find the extent of the conditional. */ cond = noce_get_condition (jump, &earliest, false); - if (! cond) + if (!cond) return FALSE; - /* Collect: - MERGE_SET = set of registers set in MERGE_BB - MERGE_SET_NOCLOBBER = like MERGE_SET, but only includes registers - that are really set, not just clobbered. - TEST_LIVE = set of registers live at EARLIEST - TEST_SET = set of registers set between EARLIEST and the - end of the block. */ + live = BITMAP_ALLOC (®_obstack); + simulate_backwards_to_point (merge_bb, live, end); + success = can_move_insns_across (head, end, earliest, jump, + merge_bb, live, + df_get_live_in (other_bb), NULL); + BITMAP_FREE (live); + if (!success) + return FALSE; + /* Collect the set of registers set in MERGE_BB. */ merge_set = BITMAP_ALLOC (®_obstack); - merge_set_noclobber = BITMAP_ALLOC (®_obstack); - - /* If we allocated new pseudos (e.g. in the conditional move - expander called from noce_emit_cmove), we must resize the - array first. */ - if (max_regno < max_reg_num ()) - max_regno = max_reg_num (); FOR_BB_INSNS (merge_bb, insn) - { - if (NONDEBUG_INSN_P (insn)) - { - df_simulate_find_defs (insn, merge_set); - df_simulate_find_noclobber_defs (insn, merge_set_noclobber); - } - } - - /* For small register class machines, don't lengthen lifetimes of - hard registers before reload. */ - if (! reload_completed - && targetm.small_register_classes_for_mode_p (VOIDmode)) - { - unsigned i; - bitmap_iterator bi; - - EXECUTE_IF_SET_IN_BITMAP (merge_set_noclobber, 0, i, bi) - { - if (i < FIRST_PSEUDO_REGISTER - && ! fixed_regs[i] - && ! global_regs[i]) - goto fail; - } - } - - /* For TEST, we're interested in a range of insns, not a whole block. - Moreover, we're interested in the insns live from OTHER_BB. */ - test_live = BITMAP_ALLOC (®_obstack); - test_set = BITMAP_ALLOC (®_obstack); - - /* The loop below takes the set of live registers - after JUMP, and calculates the live set before EARLIEST. */ - bitmap_copy (test_live, df_get_live_in (other_bb)); - df_simulate_initialize_backwards (test_bb, test_live); - for (insn = jump; ; insn = prev) - { - if (INSN_P (insn)) - { - df_simulate_find_defs (insn, test_set); - df_simulate_one_insn_backwards (test_bb, insn, test_live); - } - prev = PREV_INSN (insn); - if (insn == earliest) - break; - } - - /* We can perform the transformation if - MERGE_SET_NOCLOBBER & TEST_SET - and - MERGE_SET & TEST_LIVE - and - TEST_SET & DF_LIVE_IN (merge_bb) - are empty. */ - - if (bitmap_intersect_p (merge_set_noclobber, test_set) - || bitmap_intersect_p (merge_set, test_live) - || bitmap_intersect_p (test_set, df_get_live_in (merge_bb))) - intersect = true; - - BITMAP_FREE (test_live); - BITMAP_FREE (test_set); - - if (intersect) - goto fail; + if (NONDEBUG_INSN_P (insn)) + df_simulate_find_defs (insn, merge_set); } no_body: @@ -4288,7 +4195,6 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, remove_reg_equal_equiv_notes_for_regno (i); BITMAP_FREE (merge_set); - BITMAP_FREE (merge_set_noclobber); } reorder_insns (head, end, PREV_INSN (earliest)); @@ -4307,12 +4213,10 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, cancel: cancel_changes (0); - fail: + if (merge_set) - { - BITMAP_FREE (merge_set); - BITMAP_FREE (merge_set_noclobber); - } + BITMAP_FREE (merge_set); + return FALSE; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a9ed2f8..8c4d838 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2010-12-14 Bernd Schmidt <bernds@codesourcery.com> + + PR rtl-optimization/44374 + Reapply patch with fixes. + * gcc.target/arm/headmerge-1.c: New test. + * gcc.target/arm/headmerge-2.c: New test. + * gcc.target/i386/headmerge-1.c: New test. + * gcc.target/i386/headmerge-2.c: New test. + 2010-12-13 Jason Merrill <jason@redhat.com> PR c++/46873 diff --git a/gcc/testsuite/gcc.target/arm/headmerge-1.c b/gcc/testsuite/gcc.target/arm/headmerge-1.c new file mode 100644 index 0000000..218c6a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/headmerge-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "#120" 1 } } */ + +extern void foo1 (int); +extern void foo2 (int); + +void t (int x, int y) +{ + if (y < 5) + foo1 (120); + else + foo2 (120); +} diff --git a/gcc/testsuite/gcc.target/arm/headmerge-2.c b/gcc/testsuite/gcc.target/arm/headmerge-2.c new file mode 100644 index 0000000..36637a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/headmerge-2.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "120" 1 } } */ + +extern void foo1 (int); +extern void foo2 (int); +extern void foo3 (int); +extern void foo4 (int); +extern void foo5 (int); +extern void foo6 (int); + +void t (int x, int y) +{ + switch (y) + { + case 1: + foo1 (120); + break; + case 5: + foo2 (120); + break; + case 7: + foo3 (120); + break; + case 10: + foo4 (120); + break; + case 13: + foo5 (120); + break; + default: + foo6 (120); + break; + } +} diff --git a/gcc/testsuite/gcc.target/i386/headmerge-1.c b/gcc/testsuite/gcc.target/i386/headmerge-1.c new file mode 100644 index 0000000..941028c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/headmerge-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "120" 1 } } */ + +extern void foo1 (int); +extern void foo2 (int); + +void t (int x, int y) +{ + if (y < 5) + foo1 (120); + else + foo2 (120); +} diff --git a/gcc/testsuite/gcc.target/i386/headmerge-2.c b/gcc/testsuite/gcc.target/i386/headmerge-2.c new file mode 100644 index 0000000..36637a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/headmerge-2.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "120" 1 } } */ + +extern void foo1 (int); +extern void foo2 (int); +extern void foo3 (int); +extern void foo4 (int); +extern void foo5 (int); +extern void foo6 (int); + +void t (int x, int y) +{ + switch (y) + { + case 1: + foo1 (120); + break; + case 5: + foo2 (120); + break; + case 7: + foo3 (120); + break; + case 10: + foo4 (120); + break; + case 13: + foo5 (120); + break; + default: + foo6 (120); + break; + } +} |