predict.h (force_edge_cold): Declare.

* predict.h (force_edge_cold): Declare. * predict.c (force_edge_cold): New function. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Fix profile updating. (canonicalize_loop_induction_variables): Fix formating. * gcc.dg/tree-ssa/cunroll-12.c: New testcase. * gcc.dg/tree-ssa/cunroll-13.c: New testcase. * gcc.dg/tree-ssa/cunroll-14.c: New testcase. From-SVN: r236874
author: Jan Hubicka <hubicka@ucw.cz> 2016-05-30 12:40:33 +0200
committer: Jan Hubicka <hubicka@gcc.gnu.org> 2016-05-30 10:40:33 +0000
commit: 98bdbb39a6676776c528f3b51ce740669c06d708 (patch)
tree: 70d2283e17b094627b83cbfbd240e80625f370dd /gcc
parent: d5cf3d8da6ef568929fc8bdfc1809593ff1988a8 (diff)
download: gcc-98bdbb39a6676776c528f3b51ce740669c06d708.zip
gcc-98bdbb39a6676776c528f3b51ce740669c06d708.tar.gz
gcc-98bdbb39a6676776c528f3b51ce740669c06d708.tar.bz2
8 files changed, 176 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6488753..815a43d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-05-30  Jan Hubicka  <hubicka@ucw.cz>
+
+	* predict.h (force_edge_cold): Declare.
+	* predict.c (force_edge_cold): New function.
+	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Fix profile
+	updating.
+	(canonicalize_loop_induction_variables): Fix formating.
+
 2016-05-30  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* config/visium/visium.c (visium_split_double_add): Minor tweaks.
diff --git a/gcc/predict.c b/gcc/predict.c
index 31c5565..396e150 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -3249,3 +3249,99 @@ report_predictor_hitrates (void)
   loop_optimizer_finalize ();
 }
 
+/* Force edge E to be cold.
+   If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
+   keep low probability to represent possible error in a guess.  This is used
+   i.e. in case we predict loop to likely iterate given number of times but
+   we are not 100% sure.
+
+   This function locally updates profile without attempt to keep global
+   consistency which can not be reached in full generality without full profile
+   rebuild from probabilities alone.  Doing so is not necessarily a good idea
+   because frequencies and counts may be more realistic then probabilities.
+
+   In some cases (such as for elimination of early exits during full loop
+   unrolling) the caller can ensure that profile will get consistent
+   afterwards.  */
+
+void
+force_edge_cold (edge e, bool impossible)
+{
+  gcov_type count_sum = 0;
+  int prob_sum = 0;
+  edge_iterator ei;
+  edge e2;
+  gcov_type old_count = e->count;
+  int old_probability = e->probability;
+  gcov_type gcov_scale = REG_BR_PROB_BASE;
+  int prob_scale = REG_BR_PROB_BASE;
+
+  /* If edge is already improbably or cold, just return.  */
+  if (e->probability <= impossible ? PROB_VERY_UNLIKELY : 0
+      && (!impossible || !e->count))
+    return;
+  FOR_EACH_EDGE (e2, ei, e->src->succs)
+    if (e2 != e)
+      {
+	count_sum += e2->count;
+	prob_sum += e2->probability;
+      }
+
+  /* If there are other edges out of e->src, redistribute probabilitity
+     there.  */
+  if (prob_sum)
+    {
+      e->probability
+	 = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
+      if (old_probability)
+	e->count = RDIV (e->count * e->probability, old_probability);
+      else
+        e->count = MIN (e->count, impossible ? 0 : 1);
+
+      if (count_sum)
+	gcov_scale = RDIV ((count_sum + old_count - e->count) * REG_BR_PROB_BASE,
+			   count_sum);
+      prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
+			 prob_sum);
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	fprintf (dump_file, "Making edge %i->%i %s by redistributing "
+		 "probability to other edges.\n",
+		 e->src->index, e->dest->index,
+		 impossible ? "imposisble" : "cold");
+      FOR_EACH_EDGE (e2, ei, e->src->succs)
+	if (e2 != e)
+	  {
+	    e2->count = RDIV (e2->count * gcov_scale, REG_BR_PROB_BASE);
+	    e2->probability = RDIV (e2->probability * prob_scale,
+				    REG_BR_PROB_BASE);
+	  }
+    }
+  /* If all edges out of e->src are unlikely, the basic block itself
+     is unlikely.  */
+  else
+    {
+      e->probability = REG_BR_PROB_BASE;
+
+      /* If we did not adjusting, the source basic block has no likely edeges
+ 	 leaving other direction. In that case force that bb cold, too.
+	 This in general is difficult task to do, but handle special case when
+	 BB has only one predecestor.  This is common case when we are updating
+	 after loop transforms.  */
+      if (!prob_sum && !count_sum && single_pred_p (e->src)
+	  && e->src->frequency > (impossible ? 0 : 1))
+	{
+	  int old_frequency = e->src->frequency;
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
+		     impossible ? "imposisble" : "cold");
+	  e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
+	  e->src->count = e->count = RDIV (e->src->count * e->src->frequency,
+					   old_frequency);
+	  force_edge_cold (single_pred_edge (e->src), impossible);
+	}
+      else if (dump_file && (dump_flags & TDF_DETAILS)
+	       && maybe_hot_bb_p (cfun, e->src))
+	fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
+		 impossible ? "imposisble" : "cold");
+    }
+}
diff --git a/gcc/predict.h b/gcc/predict.h
index 86cb3bd..7994287 100644
--- a/gcc/predict.h
+++ b/gcc/predict.h
@@ -91,5 +91,6 @@ extern tree build_predict_expr (enum br_predictor, enum prediction);
 extern const char *predictor_name (enum br_predictor);
 extern void rebuild_frequencies (void);
 extern void report_predictor_hitrates (void);
+extern void force_edge_cold (edge, bool);
 
 #endif  /* GCC_PREDICT_H */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index be67102..8b989b4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-05-30  Jan Hubicka  <hubicka@ucw.cz>
+
+	* gcc.dg/tree-ssa/cunroll-12.c: New testcase.
+	* gcc.dg/tree-ssa/cunroll-13.c: New testcase.
+	* gcc.dg/tree-ssa/cunroll-14.c: New testcase.
+
 2016-05-30  Tom de Vries  <tom@codesourcery.com>
 
 	PR tree-optimization/69067
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c
new file mode 100644
index 0000000..0cc44f4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll-blocks-details" } */
+struct a {int a[8];int b;};
+void
+t(struct a *a)
+{
+  for (int i=0;a->a[i];i++)
+    a->a[i]++;
+}
+/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c
new file mode 100644
index 0000000..6e4417a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdisable-tree-cunrolli -fdisable-tree-vrp1 -fdump-tree-cunroll-blocks-details" } */
+struct a {int a[8];int b;};
+void
+t(struct a *a)
+{
+  for (int i=0;i<123456 && a->a[i];i++)
+    a->a[i]++;
+}
+/* This pass relies on the fact that we do not eliminate the redundant test for i early.
+   It is necessary to disable all passes that do so.  At the moment it is vrp1 and cunrolli.  */
+/* { dg-final { scan-tree-dump-times "Loop 1 iterates 123454 times" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c
new file mode 100644
index 0000000..4d27b18
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-cunroll-blocks-details" } */
+struct a {int a[100];};
+void
+t(struct a *a)
+{
+  for (int i=0;i<5 && a->a[i];i++)
+    a->a[i]++;
+}
+/* { dg-final { scan-tree-dump-times "loop with 5 iterations completely unrolled" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "Loop 1 iterates 4 times" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
index de447f4..4673d02 100644
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -730,8 +730,14 @@ try_unroll_loop_completely (struct loop *loop,
       if (ul == UL_SINGLE_ITER)
 	return false;
 
+      /* EXIT can be removed only if we are sure it passes first N_UNROLL
+	 iterations.  */
+      bool remove_exit = (exit && niter
+			  && TREE_CODE (niter) == INTEGER_CST
+			  && wi::leu_p (n_unroll, wi::to_widest (niter)));
+
       large = tree_estimate_loop_size
-		 (loop, exit, edge_to_cancel, &size,
+		 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
       ninsns = size.overall;
       if (large)
@@ -837,8 +843,20 @@ try_unroll_loop_completely (struct loop *loop,
 
       initialize_original_copy_tables ();
       wont_exit = sbitmap_alloc (n_unroll + 1);
-      bitmap_ones (wont_exit);
-      bitmap_clear_bit (wont_exit, 0);
+      if (exit && niter
+	  && TREE_CODE (niter) == INTEGER_CST
+	  && wi::leu_p (n_unroll, wi::to_widest (niter)))
+	{
+	  bitmap_ones (wont_exit);
+	  if (wi::eq_p (wi::to_widest (niter), n_unroll)
+	      || edge_to_cancel)
+	    bitmap_clear_bit (wont_exit, 0);
+	}
+      else
+	{
+	  exit = NULL;
+	  bitmap_clear (wont_exit);
+	}
 
       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 						 n_unroll, wont_exit,
@@ -869,6 +887,7 @@ try_unroll_loop_completely (struct loop *loop,
   if (edge_to_cancel)
     {
       gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
+      force_edge_cold (edge_to_cancel, true);
       if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 	gimple_cond_make_false (cond);
       else
@@ -1112,8 +1131,8 @@ canonicalize_loop_induction_variables (struct loop *loop,
   if (dump_file && (dump_flags & TDF_DETAILS)
       && likely_max_loop_iterations_int (loop) >= 0)
     {
-      fprintf (dump_file, "Loop likely %d iterates at most %i times.\n", loop->num,
-	       (int)likely_max_loop_iterations_int (loop));
+      fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
+	       loop->num, (int)likely_max_loop_iterations_int (loop));
     }
 
   /* Remove exits that are known to be never taken based on loop bound.
author	Jan Hubicka <hubicka@ucw.cz>	2016-05-30 12:40:33 +0200
committer	Jan Hubicka <hubicka@gcc.gnu.org>	2016-05-30 10:40:33 +0000
commit	98bdbb39a6676776c528f3b51ce740669c06d708 (patch)
tree	70d2283e17b094627b83cbfbd240e80625f370dd /gcc
parent	d5cf3d8da6ef568929fc8bdfc1809593ff1988a8 (diff)
download	gcc-98bdbb39a6676776c528f3b51ce740669c06d708.zip gcc-98bdbb39a6676776c528f3b51ce740669c06d708.tar.gz gcc-98bdbb39a6676776c528f3b51ce740669c06d708.tar.bz2