diff options
author | Eric Botcazou <ebotcazou@gcc.gnu.org> | 2017-11-23 16:36:28 +0000 |
---|---|---|
committer | Eric Botcazou <ebotcazou@gcc.gnu.org> | 2017-11-23 16:36:28 +0000 |
commit | ac9effeda3bb29c0adcd8834b45b6e5613413049 (patch) | |
tree | 3f5eec951b1bf6f87260a9f9b1ac709979be8501 /gcc/tree-ssa-loop-ivcanon.c | |
parent | 02a703675c455492a0606579a37d373e9777284c (diff) | |
download | gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.zip gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.tar.gz gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.tar.bz2 |
generic.texi (ANNOTATE_EXPR): Document 3rd operand.
* doc/generic.texi (ANNOTATE_EXPR): Document 3rd operand.
* cfgloop.h (struct loop): Add unroll field.
* function.h (struct function): Add has_unroll bitfield.
* gimplify.c (gimple_boolify) <ANNOTATE_EXPR>: Deal with unroll kind.
(gimplify_expr) <ANNOTATE_EXPR>: Propagate 3rd operand.
* loop-init.c (pass_loop2::gate): Return true if cfun->has_unroll.
(pass_rtl_unroll_loops::gate): Likewise.
* loop-unroll.c (decide_unrolling): Tweak note message. Skip loops
for which loop->unroll==1.
(decide_unroll_constant_iterations): Use note for consistency and
take loop->unroll into account. Return early if loop->unroll is set.
Fix thinko in existing test.
(decide_unroll_runtime_iterations): Use note for consistency and
take loop->unroll into account.
(decide_unroll_stupid): Likewise.
* lto-streamer-in.c (input_cfg): Read loop->unroll.
* lto-streamer-out.c (output_cfg): Write loop->unroll.
* tree-cfg.c (replace_loop_annotate_in_block) <annot_expr_unroll_kind>:
New case.
(replace_loop_annotate) <annot_expr_unroll_kind>: Likewise.
(print_loop): Print loop->unroll if set.
* tree-core.h (enum annot_expr_kind): Add annot_expr_unroll_kind.
* tree-inline.c (copy_loops): Copy unroll and set cfun->has_unroll.
* tree-pretty-print.c (dump_generic_node) <annot_expr_unroll_kind>:
New case.
* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Bail out if
loop->unroll is set and smaller than the trip count. Otherwise bypass
entirely the heuristics if loop->unroll is set. Remove dead note.
Fix off-by-one bug in other note.
(try_peel_loop): Bail out if loop->unroll is set. Fix formatting.
(tree_unroll_loops_completely_1): Force unrolling if loop->unroll
is greater than 1.
(tree_unroll_loops_completely): Make static.
(pass_complete_unroll::execute): Use correct type for variable.
(pass_complete_unrolli::execute): Fix formatting.
* tree.def (ANNOTATE_EXPR): Add 3rd operand.
ada/
* gcc-interface/trans.c (gnat_gimplify_stmt) <LOOP_STMT>: Pass 3rd
operand to ANNOTATE_EXPR and also pass unrolling hints.
c/
* c-parser.c (c_parser_while_statement): Pass 3rd operand to
ANNOTATE_EXPR.
(c_parser_do_statement): Likewise.
(c_parser_for_statement): Likewise.
cp/
* pt.c (tsubst_expr) <ANNOTATE_EXPR>: Recurse on 3rd operand.
* semantics.c (finish_while_stmt_cond): Pass 3rd operand to
ANNOTATE_EXPR.
(finish_do_stmt): Likewise.
(finish_for_cond): Likewise.
fortran/
* trans-stmt.c (gfc_trans_forall_loop): Pass 3rd operand to
ANNOTATE_EXPR.
From-SVN: r255106
Diffstat (limited to 'gcc/tree-ssa-loop-ivcanon.c')
-rw-r--r-- | gcc/tree-ssa-loop-ivcanon.c | 278 |
1 files changed, 152 insertions, 126 deletions
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 8b1daa6..a32e12b 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -681,11 +681,9 @@ try_unroll_loop_completely (struct loop *loop, HOST_WIDE_INT maxiter, location_t locus) { - unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns; - struct loop_size size; + unsigned HOST_WIDE_INT n_unroll = 0; bool n_unroll_found = false; edge edge_to_cancel = NULL; - dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; /* See if we proved number of iterations to be low constant. @@ -726,7 +724,8 @@ try_unroll_loop_completely (struct loop *loop, if (!n_unroll_found) return false; - if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) + if (!loop->unroll + && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d " @@ -740,121 +739,137 @@ try_unroll_loop_completely (struct loop *loop, if (n_unroll) { - bool large; if (ul == UL_SINGLE_ITER) return false; - /* EXIT can be removed only if we are sure it passes first N_UNROLL - iterations. */ - bool remove_exit = (exit && niter - && TREE_CODE (niter) == INTEGER_CST - && wi::leu_p (n_unroll, wi::to_widest (niter))); - - large = tree_estimate_loop_size - (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, - PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); - ninsns = size.overall; - if (large) + if (loop->unroll) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", - loop->num); - return false; + /* If the unrolling factor is too large, bail out. */ + if (n_unroll > (unsigned)loop->unroll) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Not unrolling loop %d: " + "user didn't want it unrolled completely.\n", + loop->num); + return false; + } } - - unr_insns = estimated_unrolled_size (&size, n_unroll); - if (dump_file && (dump_flags & TDF_DETAILS)) + else { - fprintf (dump_file, " Loop size: %d\n", (int) ninsns); - fprintf (dump_file, " Estimated size after unrolling: %d\n", - (int) unr_insns); - } + struct loop_size size; + /* EXIT can be removed only if we are sure it passes first N_UNROLL + iterations. */ + bool remove_exit = (exit && niter + && TREE_CODE (niter) == INTEGER_CST + && wi::leu_p (n_unroll, wi::to_widest (niter))); + bool large + = tree_estimate_loop_size + (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, + PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); + if (large) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", + loop->num); + return false; + } - /* If the code is going to shrink, we don't need to be extra cautious - on guessing if the unrolling is going to be profitable. */ - if (unr_insns - /* If there is IV variable that will become constant, we save - one instruction in the loop prologue we do not account - otherwise. */ - <= ninsns + (size.constant_iv != false)) - ; - /* We unroll only inner loops, because we do not consider it profitable - otheriwse. We still can cancel loopback edge of not rolling loop; - this is always a good idea. */ - else if (ul == UL_NO_GROWTH) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", - loop->num); - return false; - } - /* Outer loops tend to be less interesting candidates for complete - unrolling unless we can do a lot of propagation into the inner loop - body. For now we disable outer loop unrolling when the code would - grow. */ - else if (loop->inner) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "it is not innermost and code would grow.\n", - loop->num); - return false; - } - /* If there is call on a hot path through the loop, then - there is most probably not much to optimize. */ - else if (size.num_non_pure_calls_on_hot_path) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "contains call and code would grow.\n", - loop->num); - return false; - } - /* If there is pure/const call in the function, then we - can still optimize the unrolled loop body if it contains - some other interesting code than the calls and code - storing or cumulating the return value. */ - else if (size.num_pure_calls_on_hot_path - /* One IV increment, one test, one ivtmp store - and one useful stmt. That is about minimal loop - doing pure call. */ - && (size.non_call_stmts_on_hot_path - <= 3 + size.num_pure_calls_on_hot_path)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "contains just pure calls and code would grow.\n", - loop->num); - return false; - } - /* Complete unrolling is a major win when control flow is removed and - one big basic block is created. If the loop contains control flow - the optimization may still be a win because of eliminating the loop - overhead but it also may blow the branch predictor tables. - Limit number of branches on the hot path through the peeled - sequence. */ - else if (size.num_branches_on_hot_path * (int)n_unroll - > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) - { + unsigned HOST_WIDE_INT ninsns = size.overall; + unsigned HOST_WIDE_INT unr_insns + = estimated_unrolled_size (&size, n_unroll); if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - " number of branches on hot path in the unrolled sequence" - " reach --param max-peel-branches limit.\n", - loop->num); - return false; - } - else if (unr_insns - > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "(--param max-completely-peeled-insns limit reached).\n", - loop->num); - return false; + { + fprintf (dump_file, " Loop size: %d\n", (int) ninsns); + fprintf (dump_file, " Estimated size after unrolling: %d\n", + (int) unr_insns); + } + + /* If the code is going to shrink, we don't need to be extra + cautious on guessing if the unrolling is going to be + profitable. */ + if (unr_insns + /* If there is IV variable that will become constant, we + save one instruction in the loop prologue we do not + account otherwise. */ + <= ninsns + (size.constant_iv != false)) + ; + /* We unroll only inner loops, because we do not consider it + profitable otheriwse. We still can cancel loopback edge + of not rolling loop; this is always a good idea. */ + else if (ul == UL_NO_GROWTH) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", + loop->num); + return false; + } + /* Outer loops tend to be less interesting candidates for + complete unrolling unless we can do a lot of propagation + into the inner loop body. For now we disable outer loop + unrolling when the code would grow. */ + else if (loop->inner) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "it is not innermost and code would grow.\n", + loop->num); + return false; + } + /* If there is call on a hot path through the loop, then + there is most probably not much to optimize. */ + else if (size.num_non_pure_calls_on_hot_path) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "contains call and code would grow.\n", + loop->num); + return false; + } + /* If there is pure/const call in the function, then we can + still optimize the unrolled loop body if it contains some + other interesting code than the calls and code storing or + cumulating the return value. */ + else if (size.num_pure_calls_on_hot_path + /* One IV increment, one test, one ivtmp store and + one useful stmt. That is about minimal loop + doing pure call. */ + && (size.non_call_stmts_on_hot_path + <= 3 + size.num_pure_calls_on_hot_path)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "contains just pure calls and code would grow.\n", + loop->num); + return false; + } + /* Complete unrolling is major win when control flow is + removed and one big basic block is created. If the loop + contains control flow the optimization may still be a win + because of eliminating the loop overhead but it also may + blow the branch predictor tables. Limit number of + branches on the hot path through the peeled sequence. */ + else if (size.num_branches_on_hot_path * (int)n_unroll + > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "number of branches on hot path in the unrolled " + "sequence reaches --param max-peel-branches limit.\n", + loop->num); + return false; + } + else if (unr_insns + > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "number of insns in the unrolled sequence reaches " + "--param max-completely-peeled-insns limit.\n", + loop->num); + return false; + } } - if (!n_unroll) - dump_printf_loc (report_flags, locus, - "loop turned into non-loop; it never loops.\n"); initialize_original_copy_tables (); auto_sbitmap wont_exit (n_unroll + 1); @@ -898,8 +913,8 @@ try_unroll_loop_completely (struct loop *loop, else gimple_cond_make_true (cond); update_stmt (cond); - /* Do not remove the path. Doing so may remove outer loop - and confuse bookkeeping code in tree_unroll_loops_completelly. */ + /* Do not remove the path, as doing so may remove outer loop and + confuse bookkeeping code in tree_unroll_loops_completely. */ } /* Store the loop for later unlooping and exit removal. */ @@ -915,7 +930,7 @@ try_unroll_loop_completely (struct loop *loop, { dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, "loop with %d iterations completely unrolled", - (int) (n_unroll + 1)); + (int) n_unroll); if (loop->header->count.initialized_p ()) dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, " (header execution count %d)", @@ -963,7 +978,8 @@ try_peel_loop (struct loop *loop, struct loop_size size; int peeled_size; - if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 + if (!flag_peel_loops + || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 || !peeled_loops) return false; @@ -974,20 +990,29 @@ try_peel_loop (struct loop *loop, return false; } + /* We don't peel loops that will be unrolled as this can duplicate a + loop more times than the user requested. */ + if (loop->unroll) + { + if (dump_file) + fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); + return false; + } + /* Peel only innermost loops. While the code is perfectly capable of peeling non-innermost loops, the heuristics would probably need some improvements. */ if (loop->inner) { if (dump_file) - fprintf (dump_file, "Not peeling: outer loop\n"); + fprintf (dump_file, "Not peeling: outer loop\n"); return false; } if (!optimize_loop_for_speed_p (loop)) { if (dump_file) - fprintf (dump_file, "Not peeling: cold loop\n"); + fprintf (dump_file, "Not peeling: cold loop\n"); return false; } @@ -1005,7 +1030,7 @@ try_peel_loop (struct loop *loop, if (maxiter >= 0 && maxiter <= npeel) { if (dump_file) - fprintf (dump_file, "Not peeling: upper bound is known so can " + fprintf (dump_file, "Not peeling: upper bound is known so can " "unroll completely\n"); return false; } @@ -1016,7 +1041,7 @@ try_peel_loop (struct loop *loop, if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) { if (dump_file) - fprintf (dump_file, "Not peeling: rolls too much " + fprintf (dump_file, "Not peeling: rolls too much " "(%i + 1 > --param max-peel-times)\n", (int) npeel); return false; } @@ -1029,7 +1054,7 @@ try_peel_loop (struct loop *loop, > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) { if (dump_file) - fprintf (dump_file, "Not peeling: peeled sequence size is too large " + fprintf (dump_file, "Not peeling: peeled sequence size is too large " "(%i insns > --param max-peel-insns)", peeled_size); return false; } @@ -1317,7 +1342,9 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, if (!loop_father) return false; - if (may_increase_size && optimize_loop_nest_for_speed_p (loop) + if (loop->unroll > 1) + ul = UL_ALL; + else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) /* Unroll outermost loops only if asked to do so or they do not cause code growth. */ && (unroll_outer || loop_outer (loop_father))) @@ -1345,7 +1372,7 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, MAY_INCREASE_SIZE is true, perform the unrolling only if the size of the code does not increase. */ -unsigned int +static unsigned int tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) { bitmap father_bbs = BITMAP_ALLOC (NULL); @@ -1522,9 +1549,9 @@ pass_complete_unroll::execute (function *fun) re-peeling the same loop multiple times. */ if (flag_peel_loops) peeled_loops = BITMAP_ALLOC (NULL); - int val = tree_unroll_loops_completely (flag_unroll_loops - || flag_peel_loops - || optimize >= 3, true); + unsigned int val = tree_unroll_loops_completely (flag_unroll_loops + || flag_peel_loops + || optimize >= 3, true); if (peeled_loops) { BITMAP_FREE (peeled_loops); @@ -1576,8 +1603,7 @@ pass_complete_unrolli::execute (function *fun) { unsigned ret = 0; - loop_optimizer_init (LOOPS_NORMAL - | LOOPS_HAVE_RECORDED_EXITS); + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); if (number_of_loops (fun) > 1) { scev_initialize (); |