re PR tree-optimization/83887 ([graphite] ICE in verify_dominators, at dominance.c:1184 (error: dominator of 3 should be 21, not 18))

2018-01-18 Richard Biener <rguenther@suse.de> PR tree-optimization/83887 * graphite-scop-detection.c (scop_detection::get_nearest_dom_with_single_entry): Remove. (scop_detection::get_nearest_pdom_with_single_exit): Likewise. (scop_detection::merge_sese): Re-implement with a flood-fill algorithm that properly finds a SESE region if it exists. * gcc.dg/graphite/pr83887.c: New testcase. * gfortran.dg/graphite/pr83887.f90: Likewise. * gfortran.dg/graphite/pr83887.f: Likewise. From-SVN: r256841
author: Richard Biener <rguenther@suse.de> 2018-01-18 10:59:33 +0000
committer: Richard Biener <rguenth@gcc.gnu.org> 2018-01-18 10:59:33 +0000
commit: 7467ab4232babb1ac9b906fe91abb9226464b884 (patch)
tree: 79560bae0f827facc7346ddfc7f8c7557c1211ea /gcc
parent: c5affc0451c9eb83c5142c726710cb8dbe04b66f (diff)
download: gcc-7467ab4232babb1ac9b906fe91abb9226464b884.zip
gcc-7467ab4232babb1ac9b906fe91abb9226464b884.tar.gz
gcc-7467ab4232babb1ac9b906fe91abb9226464b884.tar.bz2
6 files changed, 184 insertions, 148 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 577d19b..61b003a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2018-01-18  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/83887
+	* graphite-scop-detection.c
+	(scop_detection::get_nearest_dom_with_single_entry): Remove.
+	(scop_detection::get_nearest_pdom_with_single_exit): Likewise.
+	(scop_detection::merge_sese): Re-implement with a flood-fill
+	algorithm that properly finds a SESE region if it exists.
+
 2018-01-18  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c/61240
diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index 1f2f990..b1122c2 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -309,16 +309,6 @@ public:
 
   sese_l get_sese (loop_p loop);
 
-  /* Return the closest dominator with a single entry edge.  In case of a
-     back-loop the back-edge is not counted.  */
-
-  static edge get_nearest_dom_with_single_entry (basic_block dom);
-
-  /* Return the closest post-dominator with a single exit edge.  In case of a
-     back-loop the back-edge is not counted.  */
-
-  static edge get_nearest_pdom_with_single_exit (basic_block dom);
-
   /* Merge scops at same loop depth and returns the new sese.
      Returns a new SESE when merge was successful, INVALID_SESE otherwise.  */
 
@@ -441,85 +431,6 @@ scop_detection::get_sese (loop_p loop)
   return sese_l (scop_begin, scop_end);
 }
 
-/* Return the closest dominator with a single entry edge.  */
-
-edge
-scop_detection::get_nearest_dom_with_single_entry (basic_block dom)
-{
-  if (!dom->preds)
-    return NULL;
-
-  /* If any of the dominators has two predecessors but one of them is a back
-     edge, then that basic block also qualifies as a dominator with single
-     entry.  */
-  if (dom->preds->length () == 2)
-    {
-      /* If e1->src dominates e2->src then e1->src will also dominate dom.  */
-      edge e1 = (*dom->preds)[0];
-      edge e2 = (*dom->preds)[1];
-      loop_p l = dom->loop_father;
-      loop_p l1 = e1->src->loop_father;
-      loop_p l2 = e2->src->loop_father;
-      if (l != l1 && l == l2
-	  && dominated_by_p (CDI_DOMINATORS, e2->src, e1->src))
-	return e1;
-      if (l != l2 && l == l1
-	  && dominated_by_p (CDI_DOMINATORS, e1->src, e2->src))
-	return e2;
-    }
-
-  while (dom->preds->length () != 1)
-    {
-      if (dom->preds->length () < 1)
-	return NULL;
-      dom = get_immediate_dominator (CDI_DOMINATORS, dom);
-      if (!dom->preds)
-	return NULL;
-    }
-  return (*dom->preds)[0];
-}
-
-/* Return the closest post-dominator with a single exit edge.  In case of a
-   back-loop the back-edge is not counted.  */
-
-edge
-scop_detection::get_nearest_pdom_with_single_exit (basic_block pdom)
-{
-  if (!pdom->succs)
-    return NULL;
-
-  /* If any of the post-dominators has two successors but one of them is a back
-     edge, then that basic block also qualifies as a post-dominator with single
-     exit. */
-  if (pdom->succs->length () == 2)
-    {
-      /* If e1->dest post-dominates e2->dest then e1->dest will also
-	 post-dominate pdom.  */
-      edge e1 = (*pdom->succs)[0];
-      edge e2 = (*pdom->succs)[1];
-      loop_p l = pdom->loop_father;
-      loop_p l1 = e1->dest->loop_father;
-      loop_p l2 = e2->dest->loop_father;
-      if (l != l1 && l == l2
-	  && dominated_by_p (CDI_POST_DOMINATORS, e2->dest, e1->dest))
-	return e1;
-      if (l != l2 && l == l1
-	  && dominated_by_p (CDI_POST_DOMINATORS, e1->dest, e2->dest))
-	return e2;
-    }
-
-  while (pdom->succs->length () != 1)
-    {
-      if (pdom->succs->length () < 1)
-	return NULL;
-      pdom = get_immediate_dominator (CDI_POST_DOMINATORS, pdom);
-      if (!pdom->succs)
-	return NULL;
-    }
-
-  return (*pdom->succs)[0];
-}
-
 /* Merge scops at same loop depth and returns the new sese.
    Returns a new SESE when merge was successful, INVALID_SESE otherwise.  */
 
@@ -537,73 +448,78 @@ scop_detection::merge_sese (sese_l first, sese_l second) const
 	       dp << "[scop-detection] try merging sese s2: ";
 	       print_sese (dump_file, second));
 
-  /* Assumption: Both the sese's should be at the same loop depth or one scop
-     should subsume the other like in case of nested loops.  */
-
-  /* Find the common dominators for entry,
-     and common post-dominators for the exit.  */
-  basic_block dom = nearest_common_dominator (CDI_DOMINATORS,
-					      get_entry_bb (first),
-					      get_entry_bb (second));
-
-  edge entry = get_nearest_dom_with_single_entry (dom);
-
-  if (!entry || (entry->flags & EDGE_IRREDUCIBLE_LOOP))
-    return invalid_sese;
-
-  basic_block pdom = nearest_common_dominator (CDI_POST_DOMINATORS,
-					       get_exit_bb (first),
-					       get_exit_bb (second));
-  pdom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, pdom);
-
-  edge exit = get_nearest_pdom_with_single_exit (pdom);
-
-  if (!exit || (exit->flags & EDGE_IRREDUCIBLE_LOOP))
-    return invalid_sese;
-
-  sese_l combined (entry, exit);
-
-  DEBUG_PRINT (dp << "[scop-detection] checking combined sese: ";
-	       print_sese (dump_file, combined));
-
-  /* FIXME: We could iterate to find the dom which dominates pdom, and pdom
-     which post-dominates dom, until it stabilizes.  Also, ENTRY->SRC and
-     EXIT->DEST should be in the same loop nest.  */
-  if (!dominated_by_p (CDI_DOMINATORS, pdom, dom)
-      || entry->src->loop_father != exit->dest->loop_father)
-    return invalid_sese;
-
-  /* For now we just bail out when there is a loop exit in the region
-     that is not also the exit of the region.  We could enlarge the
-     region to cover the loop that region exits to.  See PR79977.  */
-  if (loop_outer (entry->src->loop_father))
+  auto_bitmap worklist, in_sese_region;
+  bitmap_set_bit (worklist, get_entry_bb (first)->index);
+  bitmap_set_bit (worklist, get_exit_bb (first)->index);
+  bitmap_set_bit (worklist, get_entry_bb (second)->index);
+  bitmap_set_bit (worklist, get_exit_bb (second)->index);
+  edge entry = NULL, exit = NULL;
+
+  /* We can optimize the case of adding a loop entry dest or exit
+     src to the worklist (for single-exit loops) by skipping
+     directly to the exit dest / entry src.  in_sese_region
+     doesn't have to cover all blocks in the region but merely
+     its border it acts more like a visited bitmap.  */
+  do
     {
-      vec<edge> exits = get_loop_exit_edges (entry->src->loop_father);
-      for (unsigned i = 0; i < exits.length (); ++i)
+      int index = bitmap_first_set_bit (worklist);
+      bitmap_clear_bit (worklist, index);
+      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, index);
+      edge_iterator ei;
+      edge e;
+
+      /* With fake exit edges we can end up with no possible exit.  */
+      if (index == EXIT_BLOCK)
 	{
-	  if (exits[i] != exit
-	      && bb_in_region (exits[i]->src, entry->dest, exit->src))
-	    {
-	      DEBUG_PRINT (dp << "[scop-detection-fail] cannot merge seses.\n");
-	      exits.release ();
-	      return invalid_sese;
-	    }
+	  DEBUG_PRINT (dp << "[scop-detection-fail] cannot merge seses.\n");
+	  return invalid_sese;
 	}
-      exits.release ();
+
+      bitmap_set_bit (in_sese_region, bb->index);
+         
+      basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	if (e->src == dom
+	    && (! entry
+		|| dominated_by_p (CDI_DOMINATORS, entry->dest, bb)))
+	  {
+	    if (entry
+		&& ! bitmap_bit_p (in_sese_region, entry->src->index))
+	      bitmap_set_bit (worklist, entry->src->index);
+	    entry = e;
+	  }
+	else if (! bitmap_bit_p (in_sese_region, e->src->index))
+	  bitmap_set_bit (worklist, e->src->index);
+
+      basic_block pdom = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	if (e->dest == pdom
+	    && (! exit
+		|| dominated_by_p (CDI_POST_DOMINATORS, exit->src, bb)))
+	  {
+	    if (exit
+		&& ! bitmap_bit_p (in_sese_region, exit->dest->index))
+	      bitmap_set_bit (worklist, exit->dest->index);
+	    exit = e;
+	  }
+	else if (! bitmap_bit_p (in_sese_region, e->dest->index))
+	  bitmap_set_bit (worklist, e->dest->index);
     }
+  while (! bitmap_empty_p (worklist));
 
-  /* For now we just want to bail out when exit does not post-dominate entry.
-     TODO: We might just add a basic_block at the exit to make exit
-     post-dominate entry (the entire region).  */
-  if (!dominated_by_p (CDI_POST_DOMINATORS, get_entry_bb (combined),
-		       get_exit_bb (combined))
-      || !dominated_by_p (CDI_DOMINATORS, get_exit_bb (combined),
-			  get_entry_bb (combined)))
+  /* Include the BB with the loop-closed SSA PHI nodes.
+     canonicalize_loop_closed_ssa makes sure that is in proper shape.  */
+  if (exit->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
+      && loop_exit_edge_p (exit->src->loop_father, exit))
     {
-      DEBUG_PRINT (dp << "[scop-detection-fail] cannot merge seses.\n");
-      return invalid_sese;
+      gcc_assert (single_pred_p (exit->dest)
+		  && single_succ_p (exit->dest)
+		  && sese_trivially_empty_bb_p (exit->dest));
+      exit = single_succ_edge (exit->dest);
     }
 
+  sese_l combined (entry, exit);
+
   DEBUG_PRINT (dp << "[merged-sese] s1: "; print_sese (dump_file, combined));
 
   return combined;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b5a83f4..3cc5bda 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-01-18  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/83887
+	* gcc.dg/graphite/pr83887.c: New testcase.
+	* gfortran.dg/graphite/pr83887.f90: Likewise.
+	* gfortran.dg/graphite/pr83887.f: Likewise.
+
 2018-01-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
 	PR target/65578
diff --git a/gcc/testsuite/gcc.dg/graphite/pr83887.c b/gcc/testsuite/gcc.dg/graphite/pr83887.c
new file mode 100644
index 0000000..b230ec9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/pr83887.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O -floop-nest-optimize -fno-tree-loop-im" } */
+
+int z4, g7;
+
+void
+x3 (int my)
+{
+  while (my < 2)
+    {
+      for (z4 = 0; z4 < 2; ++z4)
+	{
+	}
+
+      if (my != 0)
+	for (g7 = 0; g7 < 2; ++g7)
+	  {
+	  }
+
+      ++my;
+    }
+}
diff --git a/gcc/testsuite/gfortran.dg/graphite/pr83887.f b/gcc/testsuite/gfortran.dg/graphite/pr83887.f
new file mode 100644
index 0000000..f667222
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/graphite/pr83887.f
@@ -0,0 +1,23 @@
+! { dg-do compile }
+! { dg-options "-O2 -floop-nest-optimize" }
+      SUBROUTINE STONG(IGAUSS)
+      DIMENSION EXX(6)
+      PARAMETER (MXSH=1000, MXGTOT=5000)
+      COMMON /NSHEL / EX(MXGTOT),CS(MXGTOT),NSHELL
+  100 CONTINUE
+      NSHELL = NSHELL+1
+      IF(NSHELL.GT.MXSH) THEN
+         RETURN
+      END IF
+      DO 320 I = 1,IGAUSS
+         K = K1+I-1
+         EX(K) = EXX(I)*SCALE
+  320 CONTINUE
+      IF(TNORM.GT.TOLNRM) THEN
+         STOP
+      END IF
+      DO 460 IG = K1,K2
+         CS(IG) = FACS*CS(IG)
+  460 CONTINUE
+      GO TO 100
+      END
diff --git a/gcc/testsuite/gfortran.dg/graphite/pr83887.f90 b/gcc/testsuite/gfortran.dg/graphite/pr83887.f90
new file mode 100644
index 0000000..2f299cd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/graphite/pr83887.f90
@@ -0,0 +1,59 @@
+! { dg-do compile }
+! { dg-options "-O -floop-nest-optimize" }
+      SUBROUTINE ZTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, &
+                   B, LDB )
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      complex(kind((1.0d0,1.0d0)))         ALPHA
+      complex(kind((1.0d0,1.0d0)))         A( LDA, * ), B( LDB, * )
+      EXTERNAL           XERBLA
+      INTRINSIC          CONJG, MAX
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      complex(kind((1.0d0,1.0d0)))         TEMP
+      complex(kind((1.0d0,1.0d0)))         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      complex(kind((1.0d0,1.0d0)))         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+      LSIDE  =  scan( SIDE  , 'Ll' )>0
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ =  scan( TRANSA, 'Tt' )>0
+      NOUNIT =  scan( DIAG  , 'Nn' )>0
+      UPPER  =  scan( UPLO  , 'Uu' )>0
+      INFO   = 0
+      IF( N.EQ.0 ) &
+   RETURN
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+               DO 160, J = 1, N
+                  DO 150, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT ) &
+                     TEMP = TEMP*A( I, I )
+                        DO 130, K = I + 1, M
+                           TEMP = TEMP + A( K, I )*B( K, J )
+  130                   CONTINUE
+                     ELSE
+                        IF( NOUNIT ) &
+                     TEMP = TEMP*CONJG( A( I, I ) )
+                        DO 140, K = I + 1, M
+                           TEMP = TEMP + CONJG( A( K, I ) )*B( K, J )
+  140                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  150             CONTINUE
+  160          CONTINUE
+      RETURN
+      END
+
author	Richard Biener <rguenther@suse.de>	2018-01-18 10:59:33 +0000
committer	Richard Biener <rguenth@gcc.gnu.org>	2018-01-18 10:59:33 +0000
commit	7467ab4232babb1ac9b906fe91abb9226464b884 (patch)
tree	79560bae0f827facc7346ddfc7f8c7557c1211ea /gcc
parent	c5affc0451c9eb83c5142c726710cb8dbe04b66f (diff)
download	gcc-7467ab4232babb1ac9b906fe91abb9226464b884.zip gcc-7467ab4232babb1ac9b906fe91abb9226464b884.tar.gz gcc-7467ab4232babb1ac9b906fe91abb9226464b884.tar.bz2