generic.texi (ANNOTATE_EXPR): Document 3rd operand.

* doc/generic.texi (ANNOTATE_EXPR): Document 3rd operand. * cfgloop.h (struct loop): Add unroll field. * function.h (struct function): Add has_unroll bitfield. * gimplify.c (gimple_boolify) <ANNOTATE_EXPR>: Deal with unroll kind. (gimplify_expr) <ANNOTATE_EXPR>: Propagate 3rd operand. * loop-init.c (pass_loop2::gate): Return true if cfun->has_unroll. (pass_rtl_unroll_loops::gate): Likewise. * loop-unroll.c (decide_unrolling): Tweak note message. Skip loops for which loop->unroll==1. (decide_unroll_constant_iterations): Use note for consistency and take loop->unroll into account. Return early if loop->unroll is set. Fix thinko in existing test. (decide_unroll_runtime_iterations): Use note for consistency and take loop->unroll into account. (decide_unroll_stupid): Likewise. * lto-streamer-in.c (input_cfg): Read loop->unroll. * lto-streamer-out.c (output_cfg): Write loop->unroll. * tree-cfg.c (replace_loop_annotate_in_block) <annot_expr_unroll_kind>: New case. (replace_loop_annotate) <annot_expr_unroll_kind>: Likewise. (print_loop): Print loop->unroll if set. * tree-core.h (enum annot_expr_kind): Add annot_expr_unroll_kind. * tree-inline.c (copy_loops): Copy unroll and set cfun->has_unroll. * tree-pretty-print.c (dump_generic_node) <annot_expr_unroll_kind>: New case. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Bail out if loop->unroll is set and smaller than the trip count. Otherwise bypass entirely the heuristics if loop->unroll is set. Remove dead note. Fix off-by-one bug in other note. (try_peel_loop): Bail out if loop->unroll is set. Fix formatting. (tree_unroll_loops_completely_1): Force unrolling if loop->unroll is greater than 1. (tree_unroll_loops_completely): Make static. (pass_complete_unroll::execute): Use correct type for variable. (pass_complete_unrolli::execute): Fix formatting. * tree.def (ANNOTATE_EXPR): Add 3rd operand. ada/ * gcc-interface/trans.c (gnat_gimplify_stmt) <LOOP_STMT>: Pass 3rd operand to ANNOTATE_EXPR and also pass unrolling hints. c/ * c-parser.c (c_parser_while_statement): Pass 3rd operand to ANNOTATE_EXPR. (c_parser_do_statement): Likewise. (c_parser_for_statement): Likewise. cp/ * pt.c (tsubst_expr) <ANNOTATE_EXPR>: Recurse on 3rd operand. * semantics.c (finish_while_stmt_cond): Pass 3rd operand to ANNOTATE_EXPR. (finish_do_stmt): Likewise. (finish_for_cond): Likewise. fortran/ * trans-stmt.c (gfc_trans_forall_loop): Pass 3rd operand to ANNOTATE_EXPR. From-SVN: r255106
author: Eric Botcazou <ebotcazou@gcc.gnu.org> 2017-11-23 16:36:28 +0000
committer: Eric Botcazou <ebotcazou@gcc.gnu.org> 2017-11-23 16:36:28 +0000
commit: ac9effeda3bb29c0adcd8834b45b6e5613413049 (patch)
tree: 3f5eec951b1bf6f87260a9f9b1ac709979be8501 /gcc/tree-ssa-loop-ivcanon.c
parent: 02a703675c455492a0606579a37d373e9777284c (diff)
download: gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.zip
gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.tar.gz
gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.tar.bz2
1 files changed, 152 insertions, 126 deletions
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
index 8b1daa6..a32e12b 100644
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -681,11 +681,9 @@ try_unroll_loop_completely (struct loop *loop,
 			    HOST_WIDE_INT maxiter,
 			    location_t locus)
 {
-  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
-  dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 
   /* See if we proved number of iterations to be low constant.
 
@@ -726,7 +724,8 @@ try_unroll_loop_completely (struct loop *loop,
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (!loop->unroll
+      && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Not unrolling loop %d "
@@ -740,121 +739,137 @@ try_unroll_loop_completely (struct loop *loop,
 
   if (n_unroll)
     {
-      bool large;
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      /* EXIT can be removed only if we are sure it passes first N_UNROLL
-	 iterations.  */
-      bool remove_exit = (exit && niter
-			  && TREE_CODE (niter) == INTEGER_CST
-			  && wi::leu_p (n_unroll, wi::to_widest (niter)));
-
-      large = tree_estimate_loop_size
-		 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
-		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
 	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-		     loop->num);
-	  return false;
+	  /* If the unrolling factor is too large, bail out.  */
+	  if (n_unroll > (unsigned)loop->unroll)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file,
+			 "Not unrolling loop %d: "
+			 "user didn't want it unrolled completely.\n",
+			 loop->num);
+	      return false;
+	    }
 	}
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
 	{
-	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-		   (int) unr_insns);
-	}
+	  struct loop_size size;
+	  /* EXIT can be removed only if we are sure it passes first N_UNROLL
+	     iterations.  */
+	  bool remove_exit = (exit && niter
+			      && TREE_CODE (niter) == INTEGER_CST
+			      && wi::leu_p (n_unroll, wi::to_widest (niter)));
+	  bool large
+	    = tree_estimate_loop_size
+		(loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
+		 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+	  if (large)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+			 loop->num);
+	      return false;
+	    }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-	 on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-	  /* If there is IV variable that will become constant, we save
-	     one instruction in the loop prologue we do not account
-	     otherwise.  */
-	  <= ninsns + (size.constant_iv != false))
-	;
-      /* We unroll only inner loops, because we do not consider it profitable
-	 otheriwse.  We still can cancel loopback edge of not rolling loop;
-	 this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Outer loops tend to be less interesting candidates for complete
-	 unrolling unless we can do a lot of propagation into the inner loop
-	 body.  For now we disable outer loop unrolling when the code would
-	 grow.  */
-      else if (loop->inner)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "it is not innermost and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is call on a hot path through the loop, then
-	 there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains call and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is pure/const call in the function, then we
-	 can still optimize the unrolled loop body if it contains
-	 some other interesting code than the calls and code
-	 storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-	       /* One IV increment, one test, one ivtmp store
-		  and one useful stmt.  That is about minimal loop
-		  doing pure call.  */
-	       && (size.non_call_stmts_on_hot_path
-		   <= 3 + size.num_pure_calls_on_hot_path))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains just pure calls and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Complete unrolling is a major win when control flow is removed and
-	 one big basic block is created.  If the loop contains control flow
-	 the optimization may still be a win because of eliminating the loop
-	 overhead but it also may blow the branch predictor tables.
-	 Limit number of branches on the hot path through the peeled
-	 sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-	{
+	  unsigned HOST_WIDE_INT ninsns = size.overall;
+	  unsigned HOST_WIDE_INT unr_insns
+	    = estimated_unrolled_size (&size, n_unroll);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     " number of branches on hot path in the unrolled sequence"
-		     " reach --param max-peel-branches limit.\n",
-		     loop->num);
-	  return false;
-	}
-      else if (unr_insns
-	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
-	  return false;
+	    {
+	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+		       (int) unr_insns);
+	    }
+
+	  /* If the code is going to shrink, we don't need to be extra
+	     cautious on guessing if the unrolling is going to be
+	     profitable.  */
+	  if (unr_insns
+	      /* If there is IV variable that will become constant, we
+		 save one instruction in the loop prologue we do not
+		 account otherwise.  */
+	      <= ninsns + (size.constant_iv != false))
+	    ;
+	  /* We unroll only inner loops, because we do not consider it
+	     profitable otheriwse.  We still can cancel loopback edge
+	     of not rolling loop; this is always a good idea.  */
+	  else if (ul == UL_NO_GROWTH)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Outer loops tend to be less interesting candidates for
+	     complete unrolling unless we can do a lot of propagation
+	     into the inner loop body.  For now we disable outer loop
+	     unrolling when the code would grow.  */
+	  else if (loop->inner)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "it is not innermost and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is call on a hot path through the loop, then
+	     there is most probably not much to optimize.  */
+	  else if (size.num_non_pure_calls_on_hot_path)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains call and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is pure/const call in the function, then we can
+	     still optimize the unrolled loop body if it contains some
+	     other interesting code than the calls and code storing or
+	     cumulating the return value.  */
+	  else if (size.num_pure_calls_on_hot_path
+		   /* One IV increment, one test, one ivtmp store and
+		      one useful stmt.  That is about minimal loop
+		      doing pure call.  */
+		   && (size.non_call_stmts_on_hot_path
+		       <= 3 + size.num_pure_calls_on_hot_path))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains just pure calls and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Complete unrolling is major win when control flow is
+	     removed and one big basic block is created.  If the loop
+	     contains control flow the optimization may still be a win
+	     because of eliminating the loop overhead but it also may
+	     blow the branch predictor tables.  Limit number of
+	     branches on the hot path through the peeled sequence.  */
+	  else if (size.num_branches_on_hot_path * (int)n_unroll
+		   > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "number of branches on hot path in the unrolled "
+			 "sequence reaches --param max-peel-branches limit.\n",
+			 loop->num);
+	      return false;
+	    }
+	  else if (unr_insns
+		   > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "number of insns in the unrolled sequence reaches "
+			 "--param max-completely-peeled-insns limit.\n",
+			 loop->num);
+	      return false;
+	    }
 	}
-      if (!n_unroll)
-        dump_printf_loc (report_flags, locus,
-                         "loop turned into non-loop; it never loops.\n");
 
       initialize_original_copy_tables ();
       auto_sbitmap wont_exit (n_unroll + 1);
@@ -898,8 +913,8 @@ try_unroll_loop_completely (struct loop *loop,
       else
 	gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path, as doing so may remove outer loop and
+	 confuse bookkeeping code in tree_unroll_loops_completely.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -915,7 +930,7 @@ try_unroll_loop_completely (struct loop *loop,
         {
           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
                            "loop with %d iterations completely unrolled",
-			   (int) (n_unroll + 1));
+			   (int) n_unroll);
           if (loop->header->count.initialized_p ())
             dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
                          " (header execution count %d)",
@@ -963,7 +978,8 @@ try_peel_loop (struct loop *loop,
   struct loop_size size;
   int peeled_size;
 
-  if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
+  if (!flag_peel_loops
+      || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
       || !peeled_loops)
     return false;
 
@@ -974,20 +990,29 @@ try_peel_loop (struct loop *loop,
       return false;
     }
 
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
   /* Peel only innermost loops.
      While the code is perfectly capable of peeling non-innermost loops,
      the heuristics would probably need some improvements. */
   if (loop->inner)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: outer loop\n");
+	fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
   if (!optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: cold loop\n");
+	fprintf (dump_file, "Not peeling: cold loop\n");
       return false;
     }
 
@@ -1005,7 +1030,7 @@ try_peel_loop (struct loop *loop,
   if (maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: upper bound is known so can "
+	fprintf (dump_file, "Not peeling: upper bound is known so can "
 		 "unroll completely\n");
       return false;
     }
@@ -1016,7 +1041,7 @@ try_peel_loop (struct loop *loop,
   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: rolls too much "
+	fprintf (dump_file, "Not peeling: rolls too much "
 		 "(%i + 1 > --param max-peel-times)\n", (int) npeel);
       return false;
     }
@@ -1029,7 +1054,7 @@ try_peel_loop (struct loop *loop,
       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+	fprintf (dump_file, "Not peeling: peeled sequence size is too large "
 		 "(%i insns > --param max-peel-insns)", peeled_size);
       return false;
     }
@@ -1317,7 +1342,9 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
 	 not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1345,7 +1372,7 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer,
    MAY_INCREASE_SIZE is true, perform the unrolling only if the
    size of the code does not increase.  */
 
-unsigned int
+static unsigned int
 tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 {
   bitmap father_bbs = BITMAP_ALLOC (NULL);
@@ -1522,9 +1549,9 @@ pass_complete_unroll::execute (function *fun)
      re-peeling the same loop multiple times.  */
   if (flag_peel_loops)
     peeled_loops = BITMAP_ALLOC (NULL);
-  int val = tree_unroll_loops_completely (flag_unroll_loops
-					  || flag_peel_loops
-					  || optimize >= 3, true);
+  unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
+						   || flag_peel_loops
+						   || optimize >= 3, true);
   if (peeled_loops)
     {
       BITMAP_FREE (peeled_loops);
@@ -1576,8 +1603,7 @@ pass_complete_unrolli::execute (function *fun)
 {
   unsigned ret = 0;
 
-  loop_optimizer_init (LOOPS_NORMAL
-		       | LOOPS_HAVE_RECORDED_EXITS);
+  loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
   if (number_of_loops (fun) > 1)
     {
       scev_initialize ();
author	Eric Botcazou <ebotcazou@gcc.gnu.org>	2017-11-23 16:36:28 +0000
committer	Eric Botcazou <ebotcazou@gcc.gnu.org>	2017-11-23 16:36:28 +0000
commit	ac9effeda3bb29c0adcd8834b45b6e5613413049 (patch)
tree	3f5eec951b1bf6f87260a9f9b1ac709979be8501 /gcc/tree-ssa-loop-ivcanon.c
parent	02a703675c455492a0606579a37d373e9777284c (diff)
download	gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.zip gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.tar.gz gcc-ac9effeda3bb29c0adcd8834b45b6e5613413049.tar.bz2