Merge commit 'ff7aeceb6b3a476c3bac66a7f39a5ef4240206fc' [#247, #906]

author: Thomas Schwinge <thomas@codesourcery.com> 2022-02-03 21:12:21 +0100
committer: Thomas Schwinge <thomas@codesourcery.com> 2022-02-03 21:14:10 +0100
commit: 7eef766dc5a8abda2ca2cf8d535cdf160f40b50c (patch)
tree: f85ed9010c56dc8f250d7cba5761b4eae58f2a42 /gcc/tree-ssa-loop-manip.cc
parent: 5199ecb8519c4c5f92160365cefe8e0aa1ca3873 (diff)
parent: ff7aeceb6b3a476c3bac66a7f39a5ef4240206fc (diff)
download: gcc-7eef766dc5a8abda2ca2cf8d535cdf160f40b50c.zip
gcc-7eef766dc5a8abda2ca2cf8d535cdf160f40b50c.tar.gz
gcc-7eef766dc5a8abda2ca2cf8d535cdf160f40b50c.tar.bz2
1 files changed, 1677 insertions, 0 deletions
diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc
new file mode 100644
index 0000000..770cbd2
--- /dev/null
+++ b/gcc/tree-ssa-loop-manip.cc
@@ -0,0 +1,1677 @@
+/* High-level loop manipulation functions.
+   Copyright (C) 2004-2022 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "tree-pass.h"	/* ??? for TODO_update_ssa but this isn't a pass.  */
+#include "ssa.h"
+#include "gimple-pretty-print.h"
+#include "fold-const.h"
+#include "cfganal.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "tree-cfg.h"
+#include "tree-ssa-loop-ivopts.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "tree-into-ssa.h"
+#include "tree-ssa.h"
+#include "cfgloop.h"
+#include "tree-scalar-evolution.h"
+#include "tree-inline.h"
+
+/* All bitmaps for rewriting into loop-closed SSA go on this obstack,
+   so that we can free them all at once.  */
+static bitmap_obstack loop_renamer_obstack;
+
+/* Creates an induction variable with value BASE + STEP * iteration in LOOP.
+   It is expected that neither BASE nor STEP are shared with other expressions
+   (unless the sharing rules allow this).  Use VAR as a base var_decl for it
+   (if NULL, a new temporary will be created).  The increment will occur at
+   INCR_POS (after it if AFTER is true, before it otherwise).  INCR_POS and
+   AFTER can be computed using standard_iv_increment_position.  The ssa versions
+   of the variable before and after increment will be stored in VAR_BEFORE and
+   VAR_AFTER (unless they are NULL).  */
+
+void
+create_iv (tree base, tree step, tree var, class loop *loop,
+	   gimple_stmt_iterator *incr_pos, bool after,
+	   tree *var_before, tree *var_after)
+{
+  gassign *stmt;
+  gphi *phi;
+  tree initial, step1;
+  gimple_seq stmts;
+  tree vb, va;
+  enum tree_code incr_op = PLUS_EXPR;
+  edge pe = loop_preheader_edge (loop);
+
+  if (var != NULL_TREE)
+    {
+      vb = make_ssa_name (var);
+      va = make_ssa_name (var);
+    }
+  else
+    {
+      vb = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
+      va = make_temp_ssa_name (TREE_TYPE (base), NULL, "ivtmp");
+    }
+  if (var_before)
+    *var_before = vb;
+  if (var_after)
+    *var_after = va;
+
+  /* For easier readability of the created code, produce MINUS_EXPRs
+     when suitable.  */
+  if (TREE_CODE (step) == INTEGER_CST)
+    {
+      if (TYPE_UNSIGNED (TREE_TYPE (step)))
+	{
+	  step1 = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
+	  if (tree_int_cst_lt (step1, step))
+	    {
+	      incr_op = MINUS_EXPR;
+	      step = step1;
+	    }
+	}
+      else
+	{
+	  bool ovf;
+
+	  if (!tree_expr_nonnegative_warnv_p (step, &ovf)
+	      && may_negate_without_overflow_p (step))
+	    {
+	      incr_op = MINUS_EXPR;
+	      step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
+	    }
+	}
+    }
+  if (POINTER_TYPE_P (TREE_TYPE (base)))
+    {
+      if (TREE_CODE (base) == ADDR_EXPR)
+	mark_addressable (TREE_OPERAND (base, 0));
+      step = convert_to_ptrofftype (step);
+      if (incr_op == MINUS_EXPR)
+	step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
+      incr_op = POINTER_PLUS_EXPR;
+    }
+  /* Gimplify the step if necessary.  We put the computations in front of the
+     loop (i.e. the step should be loop invariant).  */
+  step = force_gimple_operand (step, &stmts, true, NULL_TREE);
+  if (stmts)
+    gsi_insert_seq_on_edge_immediate (pe, stmts);
+
+  stmt = gimple_build_assign (va, incr_op, vb, step);
+  /* Prevent the increment from inheriting a bogus location if it is not put
+     immediately after a statement whose location is known.  */
+  if (after)
+    {
+      if (gsi_end_p (*incr_pos)
+	  || (is_gimple_debug (gsi_stmt (*incr_pos))
+	      && gsi_bb (*incr_pos)
+	      && gsi_end_p (gsi_last_nondebug_bb (gsi_bb (*incr_pos)))))
+	{
+	  edge e = single_succ_edge (gsi_bb (*incr_pos));
+	  gimple_set_location (stmt, e->goto_locus);
+	}
+      gsi_insert_after (incr_pos, stmt, GSI_NEW_STMT);
+    }
+  else
+    {
+      gimple_stmt_iterator gsi = *incr_pos;
+      if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
+	gsi_next_nondebug (&gsi);
+      if (!gsi_end_p (gsi))
+	gimple_set_location (stmt, gimple_location (gsi_stmt (gsi)));
+      gsi_insert_before (incr_pos, stmt, GSI_NEW_STMT);
+    }
+
+  initial = force_gimple_operand (base, &stmts, true, var);
+  if (stmts)
+    gsi_insert_seq_on_edge_immediate (pe, stmts);
+
+  phi = create_phi_node (vb, loop->header);
+  add_phi_arg (phi, initial, loop_preheader_edge (loop), UNKNOWN_LOCATION);
+  add_phi_arg (phi, va, loop_latch_edge (loop), UNKNOWN_LOCATION);
+}
+
+/* Return the innermost superloop LOOP of USE_LOOP that is a superloop of
+   both DEF_LOOP and USE_LOOP.  */
+
+static inline class loop *
+find_sibling_superloop (class loop *use_loop, class loop *def_loop)
+{
+  unsigned ud = loop_depth (use_loop);
+  unsigned dd = loop_depth (def_loop);
+  gcc_assert (ud > 0 && dd > 0);
+  if (ud > dd)
+    use_loop = superloop_at_depth (use_loop, dd);
+  if (ud < dd)
+    def_loop = superloop_at_depth (def_loop, ud);
+  while (loop_outer (use_loop) != loop_outer (def_loop))
+    {
+      use_loop = loop_outer (use_loop);
+      def_loop = loop_outer (def_loop);
+      gcc_assert (use_loop && def_loop);
+    }
+  return use_loop;
+}
+
+/* DEF_BB is a basic block containing a DEF that needs rewriting into
+   loop-closed SSA form.  USE_BLOCKS is the set of basic blocks containing
+   uses of DEF that "escape" from the loop containing DEF_BB (i.e. blocks in
+   USE_BLOCKS are dominated by DEF_BB but not in the loop father of DEF_B).
+   ALL_EXITS[I] is the set of all basic blocks that exit loop I.
+
+   Compute the subset of LOOP_EXITS that exit the loop containing DEF_BB
+   or one of its loop fathers, in which DEF is live.  This set is returned
+   in the bitmap LIVE_EXITS.
+
+   Instead of computing the complete livein set of the def, we use the loop
+   nesting tree as a form of poor man's structure analysis.  This greatly
+   speeds up the analysis, which is important because this function may be
+   called on all SSA names that need rewriting, one at a time.  */
+
+static void
+compute_live_loop_exits (bitmap live_exits, bitmap use_blocks,
+			 bitmap *loop_exits, basic_block def_bb)
+{
+  unsigned i;
+  bitmap_iterator bi;
+  class loop *def_loop = def_bb->loop_father;
+  unsigned def_loop_depth = loop_depth (def_loop);
+  bitmap def_loop_exits;
+
+  /* Normally the work list size is bounded by the number of basic
+     blocks in the largest loop.  We don't know this number, but we
+     can be fairly sure that it will be relatively small.  */
+  auto_vec<basic_block> worklist (MAX (8, n_basic_blocks_for_fn (cfun) / 128));
+
+  EXECUTE_IF_SET_IN_BITMAP (use_blocks, 0, i, bi)
+    {
+      basic_block use_bb = BASIC_BLOCK_FOR_FN (cfun, i);
+      class loop *use_loop = use_bb->loop_father;
+      gcc_checking_assert (def_loop != use_loop
+			   && ! flow_loop_nested_p (def_loop, use_loop));
+      if (! flow_loop_nested_p (use_loop, def_loop))
+	use_bb = find_sibling_superloop (use_loop, def_loop)->header;
+      if (bitmap_set_bit (live_exits, use_bb->index))
+	worklist.safe_push (use_bb);
+    }
+
+  /* Iterate until the worklist is empty.  */
+  while (! worklist.is_empty ())
+    {
+      edge e;
+      edge_iterator ei;
+
+      /* Pull a block off the worklist.  */
+      basic_block bb = worklist.pop ();
+
+      /* Make sure we have at least enough room in the work list
+	 for all predecessors of this block.  */
+      worklist.reserve (EDGE_COUNT (bb->preds));
+
+      /* For each predecessor block.  */
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	{
+	  basic_block pred = e->src;
+	  class loop *pred_loop = pred->loop_father;
+	  unsigned pred_loop_depth = loop_depth (pred_loop);
+	  bool pred_visited;
+
+	  /* We should have met DEF_BB along the way.  */
+	  gcc_assert (pred != ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+	  if (pred_loop_depth >= def_loop_depth)
+	    {
+	      if (pred_loop_depth > def_loop_depth)
+		pred_loop = superloop_at_depth (pred_loop, def_loop_depth);
+	      /* If we've reached DEF_LOOP, our train ends here.  */
+	      if (pred_loop == def_loop)
+		continue;
+	    }
+	  else if (! flow_loop_nested_p (pred_loop, def_loop))
+	    pred = find_sibling_superloop (pred_loop, def_loop)->header;
+
+	  /* Add PRED to the LIVEIN set.  PRED_VISITED is true if
+	     we had already added PRED to LIVEIN before.  */
+	  pred_visited = !bitmap_set_bit (live_exits, pred->index);
+
+	  /* If we have visited PRED before, don't add it to the worklist.
+	     If BB dominates PRED, then we're probably looking at a loop.
+	     We're only interested in looking up in the dominance tree
+	     because DEF_BB dominates all the uses.  */
+	  if (pred_visited || dominated_by_p (CDI_DOMINATORS, pred, bb))
+	    continue;
+
+	  worklist.quick_push (pred);
+	}
+    }
+
+  def_loop_exits = BITMAP_ALLOC (&loop_renamer_obstack);
+  for (class loop *loop = def_loop;
+       loop != current_loops->tree_root;
+       loop = loop_outer (loop))
+    bitmap_ior_into (def_loop_exits, loop_exits[loop->num]);
+  bitmap_and_into (live_exits, def_loop_exits);
+  BITMAP_FREE (def_loop_exits);
+}
+
+/* Add a loop-closing PHI for VAR in basic block EXIT.  */
+
+static void
+add_exit_phi (basic_block exit, tree var)
+{
+  gphi *phi;
+  edge e;
+  edge_iterator ei;
+
+  /* Check that at least one of the edges entering the EXIT block exits
+     the loop, or a superloop of that loop, that VAR is defined in.  */
+  if (flag_checking)
+    {
+      gimple *def_stmt = SSA_NAME_DEF_STMT (var);
+      basic_block def_bb = gimple_bb (def_stmt);
+      FOR_EACH_EDGE (e, ei, exit->preds)
+	{
+	  class loop *aloop = find_common_loop (def_bb->loop_father,
+						 e->src->loop_father);
+	  if (!flow_bb_inside_loop_p (aloop, e->dest))
+	    break;
+	}
+      gcc_assert (e);
+    }
+
+  phi = create_phi_node (NULL_TREE, exit);
+  create_new_def_for (var, phi, gimple_phi_result_ptr (phi));
+  FOR_EACH_EDGE (e, ei, exit->preds)
+    add_phi_arg (phi, var, e, UNKNOWN_LOCATION);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, ";; Created LCSSA PHI: ");
+      print_gimple_stmt (dump_file, phi, 0, dump_flags);
+    }
+}
+
+/* Add exit phis for VAR that is used in LIVEIN.
+   Exits of the loops are stored in LOOP_EXITS.  */
+
+static void
+add_exit_phis_var (tree var, bitmap use_blocks, bitmap *loop_exits)
+{
+  unsigned index;
+  bitmap_iterator bi;
+  basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
+  bitmap live_exits = BITMAP_ALLOC (&loop_renamer_obstack);
+
+  gcc_checking_assert (! bitmap_bit_p (use_blocks, def_bb->index));
+
+  compute_live_loop_exits (live_exits, use_blocks, loop_exits, def_bb);
+
+  EXECUTE_IF_SET_IN_BITMAP (live_exits, 0, index, bi)
+    {
+      add_exit_phi (BASIC_BLOCK_FOR_FN (cfun, index), var);
+    }
+
+  BITMAP_FREE (live_exits);
+}
+
+/* Add exit phis for the names marked in NAMES_TO_RENAME.
+   Exits of the loops are stored in EXITS.  Sets of blocks where the ssa
+   names are used are stored in USE_BLOCKS.  */
+
+static void
+add_exit_phis (bitmap names_to_rename, bitmap *use_blocks, bitmap *loop_exits)
+{
+  unsigned i;
+  bitmap_iterator bi;
+
+  EXECUTE_IF_SET_IN_BITMAP (names_to_rename, 0, i, bi)
+    {
+      add_exit_phis_var (ssa_name (i), use_blocks[i], loop_exits);
+    }
+}
+
+/* Fill the array of bitmaps LOOP_EXITS with all loop exit edge targets.  */
+
+static void
+get_loops_exits (bitmap *loop_exits)
+{
+  unsigned j;
+  edge e;
+
+  for (auto loop : loops_list (cfun, 0))
+    {
+      auto_vec<edge> exit_edges = get_loop_exit_edges (loop);
+      loop_exits[loop->num] = BITMAP_ALLOC (&loop_renamer_obstack);
+      FOR_EACH_VEC_ELT (exit_edges, j, e)
+        bitmap_set_bit (loop_exits[loop->num], e->dest->index);
+    }
+}
+
+/* For USE in BB, if it is used outside of the loop it is defined in,
+   mark it for rewrite.  Record basic block BB where it is used
+   to USE_BLOCKS.  Record the ssa name index to NEED_PHIS bitmap.
+   Note that for USEs in phis, BB should be the src of the edge corresponding to
+   the use, rather than the bb containing the phi.  */
+
+static void
+find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
+			 bitmap need_phis)
+{
+  unsigned ver;
+  basic_block def_bb;
+  class loop *def_loop;
+
+  if (TREE_CODE (use) != SSA_NAME)
+    return;
+
+  ver = SSA_NAME_VERSION (use);
+  def_bb = gimple_bb (SSA_NAME_DEF_STMT (use));
+  if (!def_bb)
+    return;
+  def_loop = def_bb->loop_father;
+
+  /* If the definition is not inside a loop, it is not interesting.  */
+  if (!loop_outer (def_loop))
+    return;
+
+  /* If the use is not outside of the loop it is defined in, it is not
+     interesting.  */
+  if (flow_bb_inside_loop_p (def_loop, bb))
+    return;
+
+  /* If we're seeing VER for the first time, we still have to allocate
+     a bitmap for its uses.  */
+  if (bitmap_set_bit (need_phis, ver))
+    use_blocks[ver] = BITMAP_ALLOC (&loop_renamer_obstack);
+  bitmap_set_bit (use_blocks[ver], bb->index);
+}
+
+/* For uses matching USE_FLAGS in STMT, mark names that are used outside of the
+   loop they are defined to rewrite.  Record the set of blocks in which the ssa
+   names are used to USE_BLOCKS, and the ssa names themselves to NEED_PHIS.  */
+
+static void
+find_uses_to_rename_stmt (gimple *stmt, bitmap *use_blocks, bitmap need_phis,
+			  int use_flags)
+{
+  ssa_op_iter iter;
+  tree var;
+  basic_block bb = gimple_bb (stmt);
+
+  if (is_gimple_debug (stmt))
+    return;
+
+  /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows SSA_OP_VIRTUAL_USES
+     only.  */
+  if (use_flags == SSA_OP_VIRTUAL_USES)
+    {
+      tree vuse = gimple_vuse (stmt);
+      if (vuse != NULL_TREE)
+	find_uses_to_rename_use (bb, gimple_vuse (stmt), use_blocks, need_phis);
+    }
+  else
+    FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, use_flags)
+      find_uses_to_rename_use (bb, var, use_blocks, need_phis);
+}
+
+/* Marks names matching USE_FLAGS that are used in BB and outside of the loop
+   they are defined in for rewrite.  Records the set of blocks in which the ssa
+   names are used to USE_BLOCKS.  Record the SSA names that will
+   need exit PHIs in NEED_PHIS.  */
+
+static void
+find_uses_to_rename_bb (basic_block bb, bitmap *use_blocks, bitmap need_phis,
+			int use_flags)
+{
+  edge e;
+  edge_iterator ei;
+  bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
+  bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
+
+  FOR_EACH_EDGE (e, ei, bb->succs)
+    for (gphi_iterator bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi);
+	 gsi_next (&bsi))
+      {
+        gphi *phi = bsi.phi ();
+	bool virtual_p = virtual_operand_p (gimple_phi_result (phi));
+	if ((virtual_p && do_virtuals)
+	    || (!virtual_p && do_nonvirtuals))
+	  find_uses_to_rename_use (bb, PHI_ARG_DEF_FROM_EDGE (phi, e),
+				   use_blocks, need_phis);
+      }
+
+  for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
+       gsi_next (&bsi))
+    find_uses_to_rename_stmt (gsi_stmt (bsi), use_blocks, need_phis,
+			      use_flags);
+}
+
+/* Marks names matching USE_FLAGS that are used outside of the loop they are
+   defined in for rewrite.  Records the set of blocks in which the ssa names are
+   used to USE_BLOCKS.  Record the SSA names that will need exit PHIs in
+   NEED_PHIS.  If CHANGED_BBS is not NULL, scan only blocks in this set.  */
+
+static void
+find_uses_to_rename (bitmap changed_bbs, bitmap *use_blocks, bitmap need_phis,
+		     int use_flags)
+{
+  basic_block bb;
+  unsigned index;
+  bitmap_iterator bi;
+
+  if (changed_bbs)
+    EXECUTE_IF_SET_IN_BITMAP (changed_bbs, 0, index, bi)
+      {
+	bb = BASIC_BLOCK_FOR_FN (cfun, index);
+	if (bb)
+	  find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
+      }
+  else
+    FOR_EACH_BB_FN (bb, cfun)
+      find_uses_to_rename_bb (bb, use_blocks, need_phis, use_flags);
+}
+
+/* Mark uses of DEF that are used outside of the loop they are defined in for
+   rewrite.  Record the set of blocks in which the ssa names are used to
+   USE_BLOCKS.  Record the SSA names that will need exit PHIs in NEED_PHIS.  */
+
+static void
+find_uses_to_rename_def (tree def, bitmap *use_blocks, bitmap need_phis)
+{
+  gimple *use_stmt;
+  imm_use_iterator imm_iter;
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
+    {
+      if (is_gimple_debug (use_stmt))
+	continue;
+
+      basic_block use_bb = gimple_bb (use_stmt);
+
+      use_operand_p use_p;
+      FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+	{
+	  if (gimple_code (use_stmt) == GIMPLE_PHI)
+	    {
+	      edge e = gimple_phi_arg_edge (as_a <gphi *> (use_stmt),
+					    PHI_ARG_INDEX_FROM_USE (use_p));
+	      use_bb = e->src;
+	    }
+	  find_uses_to_rename_use (use_bb, USE_FROM_PTR (use_p), use_blocks,
+				   need_phis);
+	}
+    }
+}
+
+/* Marks names matching USE_FLAGS that are defined in LOOP and used outside of
+   it for rewrite.  Records the set of blocks in which the ssa names are used to
+   USE_BLOCKS.  Record the SSA names that will need exit PHIs in NEED_PHIS.  */
+
+static void
+find_uses_to_rename_in_loop (class loop *loop, bitmap *use_blocks,
+			     bitmap need_phis, int use_flags)
+{
+  bool do_virtuals = (use_flags & SSA_OP_VIRTUAL_USES) != 0;
+  bool do_nonvirtuals = (use_flags & SSA_OP_USE) != 0;
+  int def_flags = ((do_virtuals ? SSA_OP_VIRTUAL_DEFS : 0)
+		   | (do_nonvirtuals ? SSA_OP_DEF : 0));
+
+
+  basic_block *bbs = get_loop_body (loop);
+
+  for (unsigned int i = 0; i < loop->num_nodes; i++)
+    {
+      basic_block bb = bbs[i];
+
+      for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
+	   gsi_next (&bsi))
+	{
+	  gphi *phi = bsi.phi ();
+	  tree res = gimple_phi_result (phi);
+	  bool virtual_p = virtual_operand_p (res);
+	  if ((virtual_p && do_virtuals)
+	      || (!virtual_p && do_nonvirtuals))
+	    find_uses_to_rename_def (res, use_blocks, need_phis);
+      }
+
+      for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
+	   gsi_next (&bsi))
+	{
+	  gimple *stmt = gsi_stmt (bsi);
+	  /* FOR_EACH_SSA_TREE_OPERAND iterator does not allows
+	     SSA_OP_VIRTUAL_DEFS only.  */
+	  if (def_flags == SSA_OP_VIRTUAL_DEFS)
+	    {
+	      tree vdef = gimple_vdef (stmt);
+	      if (vdef != NULL)
+		find_uses_to_rename_def (vdef, use_blocks, need_phis);
+	    }
+	  else
+	    {
+	      tree var;
+	      ssa_op_iter iter;
+	      FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, def_flags)
+		find_uses_to_rename_def (var, use_blocks, need_phis);
+	    }
+	}
+    }
+
+  XDELETEVEC (bbs);
+}
+
+/* Rewrites the program into a loop closed ssa form -- i.e. inserts extra
+   phi nodes to ensure that no variable is used outside the loop it is
+   defined in.
+
+   This strengthening of the basic ssa form has several advantages:
+
+   1) Updating it during unrolling/peeling/versioning is trivial, since
+      we do not need to care about the uses outside of the loop.
+      The same applies to virtual operands which are also rewritten into
+      loop closed SSA form.  Note that virtual operands are always live
+      until function exit.
+   2) The behavior of all uses of an induction variable is the same.
+      Without this, you need to distinguish the case when the variable
+      is used outside of the loop it is defined in, for example
+
+      for (i = 0; i < 100; i++)
+	{
+	  for (j = 0; j < 100; j++)
+	    {
+	      k = i + j;
+	      use1 (k);
+	    }
+	  use2 (k);
+	}
+
+      Looking from the outer loop with the normal SSA form, the first use of k
+      is not well-behaved, while the second one is an induction variable with
+      base 99 and step 1.
+
+      If LOOP is non-null, only rewrite uses that have defs in LOOP.  Otherwise,
+      if CHANGED_BBS is not NULL, we look for uses outside loops only in the
+      basic blocks in this set.
+
+      USE_FLAGS allows us to specify whether we want virtual, non-virtual or
+      both variables rewritten.
+
+      UPDATE_FLAG is used in the call to update_ssa.  See
+      TODO_update_ssa* for documentation.  */
+
+void
+rewrite_into_loop_closed_ssa_1 (bitmap changed_bbs, unsigned update_flag,
+				int use_flags, class loop *loop)
+{
+  bitmap *use_blocks;
+  bitmap names_to_rename;
+
+  loops_state_set (LOOP_CLOSED_SSA);
+  if (number_of_loops (cfun) <= 1)
+    return;
+
+  /* If the pass has caused the SSA form to be out-of-date, update it
+     now.  */
+  if (update_flag != 0)
+    update_ssa (update_flag);
+  else if (flag_checking)
+    verify_ssa (true, true);
+
+  bitmap_obstack_initialize (&loop_renamer_obstack);
+
+  names_to_rename = BITMAP_ALLOC (&loop_renamer_obstack);
+
+  /* Uses of names to rename.  We don't have to initialize this array,
+     because we know that we will only have entries for the SSA names
+     in NAMES_TO_RENAME.  */
+  use_blocks = XNEWVEC (bitmap, num_ssa_names);
+
+  if (loop != NULL)
+    {
+      gcc_assert (changed_bbs == NULL);
+      find_uses_to_rename_in_loop (loop, use_blocks, names_to_rename,
+				   use_flags);
+    }
+  else
+    {
+      gcc_assert (loop == NULL);
+      find_uses_to_rename (changed_bbs, use_blocks, names_to_rename, use_flags);
+    }
+
+  if (!bitmap_empty_p (names_to_rename))
+    {
+      /* An array of bitmaps where LOOP_EXITS[I] is the set of basic blocks
+	 that are the destination of an edge exiting loop number I.  */
+      bitmap *loop_exits = XNEWVEC (bitmap, number_of_loops (cfun));
+      get_loops_exits (loop_exits);
+
+      /* Add the PHI nodes on exits of the loops for the names we need to
+	 rewrite.  */
+      add_exit_phis (names_to_rename, use_blocks, loop_exits);
+
+      free (loop_exits);
+
+      /* Fix up all the names found to be used outside their original
+	 loops.  */
+      update_ssa (TODO_update_ssa);
+    }
+
+  bitmap_obstack_release (&loop_renamer_obstack);
+  free (use_blocks);
+}
+
+/* Rewrites the non-virtual defs and uses into a loop closed ssa form.  If
+   CHANGED_BBS is not NULL, we look for uses outside loops only in the basic
+   blocks in this set.  UPDATE_FLAG is used in the call to update_ssa.  See
+   TODO_update_ssa* for documentation.  */
+
+void
+rewrite_into_loop_closed_ssa (bitmap changed_bbs, unsigned update_flag)
+{
+  rewrite_into_loop_closed_ssa_1 (changed_bbs, update_flag, SSA_OP_USE, NULL);
+}
+
+/* Rewrites virtual defs and uses with def in LOOP into loop closed ssa
+   form.  */
+
+void
+rewrite_virtuals_into_loop_closed_ssa (class loop *loop)
+{
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_VIRTUAL_USES, loop);
+}
+
+/* Check invariants of the loop closed ssa form for the def in DEF_BB.  */
+
+static void
+check_loop_closed_ssa_def (basic_block def_bb, tree def)
+{
+  use_operand_p use_p;
+  imm_use_iterator iterator;
+  FOR_EACH_IMM_USE_FAST (use_p, iterator, def)
+    {
+      if (is_gimple_debug (USE_STMT (use_p)))
+	continue;
+
+      basic_block use_bb = gimple_bb (USE_STMT (use_p));
+      if (is_a <gphi *> (USE_STMT (use_p)))
+	use_bb = EDGE_PRED (use_bb, PHI_ARG_INDEX_FROM_USE (use_p))->src;
+
+      gcc_assert (flow_bb_inside_loop_p (def_bb->loop_father, use_bb));
+    }
+}
+
+/* Checks invariants of loop closed ssa form in BB.  */
+
+static void
+check_loop_closed_ssa_bb (basic_block bb)
+{
+  for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
+       gsi_next (&bsi))
+    {
+      gphi *phi = bsi.phi ();
+
+      if (!virtual_operand_p (PHI_RESULT (phi)))
+	check_loop_closed_ssa_def (bb, PHI_RESULT (phi));
+    }
+
+  for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); !gsi_end_p (bsi);
+       gsi_next_nondebug (&bsi))
+    {
+      ssa_op_iter iter;
+      tree var;
+      gimple *stmt = gsi_stmt (bsi);
+
+      FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF)
+	check_loop_closed_ssa_def (bb, var);
+    }
+}
+
+/* Checks that invariants of the loop closed ssa form are preserved.
+   Call verify_ssa when VERIFY_SSA_P is true.  Note all loops are checked
+   if LOOP is NULL, otherwise, only LOOP is checked.  */
+
+DEBUG_FUNCTION void
+verify_loop_closed_ssa (bool verify_ssa_p, class loop *loop)
+{
+  if (number_of_loops (cfun) <= 1)
+    return;
+
+  if (verify_ssa_p)
+    verify_ssa (false, true);
+
+  timevar_push (TV_VERIFY_LOOP_CLOSED);
+
+  if (loop == NULL)
+    {
+      basic_block bb;
+
+      FOR_EACH_BB_FN (bb, cfun)
+	if (bb->loop_father && bb->loop_father->num > 0)
+	  check_loop_closed_ssa_bb (bb);
+    }
+  else
+    {
+      basic_block *bbs = get_loop_body (loop);
+
+      for (unsigned i = 0; i < loop->num_nodes; ++i)
+	check_loop_closed_ssa_bb (bbs[i]);
+
+      free (bbs);
+    }
+
+  timevar_pop (TV_VERIFY_LOOP_CLOSED);
+}
+
+/* Split loop exit edge EXIT.  The things are a bit complicated by a need to
+   preserve the loop closed ssa form.  If COPY_CONSTANTS_P is true then
+   forwarder PHIs are also created for constant arguments.
+   The newly created block is returned.  */
+
+basic_block
+split_loop_exit_edge (edge exit, bool copy_constants_p)
+{
+  basic_block dest = exit->dest;
+  basic_block bb = split_edge (exit);
+  gphi *phi, *new_phi;
+  tree new_name, name;
+  use_operand_p op_p;
+  gphi_iterator psi;
+  location_t locus;
+
+  for (psi = gsi_start_phis (dest); !gsi_end_p (psi); gsi_next (&psi))
+    {
+      phi = psi.phi ();
+      op_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (bb));
+      locus = gimple_phi_arg_location_from_edge (phi, single_succ_edge (bb));
+
+      name = USE_FROM_PTR (op_p);
+
+      /* If the argument of the PHI node is a constant, we do not need
+	 to keep it inside loop.  */
+      if (TREE_CODE (name) != SSA_NAME
+	  && !copy_constants_p)
+	continue;
+
+      /* Otherwise create an auxiliary phi node that will copy the value
+	 of the SSA name out of the loop.  */
+      new_name = duplicate_ssa_name (PHI_RESULT (phi), NULL);
+      new_phi = create_phi_node (new_name, bb);
+      add_phi_arg (new_phi, name, exit, locus);
+      SET_USE (op_p, new_name);
+    }
+
+  return bb;
+}
+
+/* Returns the basic block in that statements should be emitted for induction
+   variables incremented at the end of the LOOP.  */
+
+basic_block
+ip_end_pos (class loop *loop)
+{
+  return loop->latch;
+}
+
+/* Returns the basic block in that statements should be emitted for induction
+   variables incremented just before exit condition of a LOOP.  */
+
+basic_block
+ip_normal_pos (class loop *loop)
+{
+  gimple *last;
+  basic_block bb;
+  edge exit;
+
+  if (!single_pred_p (loop->latch))
+    return NULL;
+
+  bb = single_pred (loop->latch);
+  last = last_stmt (bb);
+  if (!last
+      || gimple_code (last) != GIMPLE_COND)
+    return NULL;
+
+  exit = EDGE_SUCC (bb, 0);
+  if (exit->dest == loop->latch)
+    exit = EDGE_SUCC (bb, 1);
+
+  if (flow_bb_inside_loop_p (loop, exit->dest))
+    return NULL;
+
+  return bb;
+}
+
+/* Stores the standard position for induction variable increment in LOOP
+   (just before the exit condition if it is available and latch block is empty,
+   end of the latch block otherwise) to BSI.  INSERT_AFTER is set to true if
+   the increment should be inserted after *BSI.  */
+
+void
+standard_iv_increment_position (class loop *loop, gimple_stmt_iterator *bsi,
+				bool *insert_after)
+{
+  basic_block bb = ip_normal_pos (loop), latch = ip_end_pos (loop);
+  gimple *last = last_stmt (latch);
+
+  if (!bb
+      || (last && gimple_code (last) != GIMPLE_LABEL))
+    {
+      *bsi = gsi_last_bb (latch);
+      *insert_after = true;
+    }
+  else
+    {
+      *bsi = gsi_last_bb (bb);
+      *insert_after = false;
+    }
+}
+
+/* Copies phi node arguments for duplicated blocks.  The index of the first
+   duplicated block is FIRST_NEW_BLOCK.  */
+
+static void
+copy_phi_node_args (unsigned first_new_block)
+{
+  unsigned i;
+
+  for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
+    BASIC_BLOCK_FOR_FN (cfun, i)->flags |= BB_DUPLICATED;
+
+  for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
+    add_phi_args_after_copy_bb (BASIC_BLOCK_FOR_FN (cfun, i));
+
+  for (i = first_new_block; i < (unsigned) last_basic_block_for_fn (cfun); i++)
+    BASIC_BLOCK_FOR_FN (cfun, i)->flags &= ~BB_DUPLICATED;
+}
+
+
+/* The same as cfgloopmanip.cc:duplicate_loop_body_to_header_edge, but also
+   updates the PHI nodes at start of the copied region.  In order to
+   achieve this, only loops whose exits all lead to the same location
+   are handled.
+
+   Notice that we do not completely update the SSA web after
+   duplication.  The caller is responsible for calling update_ssa
+   after the loop has been duplicated.  */
+
+bool
+gimple_duplicate_loop_body_to_header_edge (class loop *loop, edge e,
+					   unsigned int ndupl,
+					   sbitmap wont_exit, edge orig,
+					   vec<edge> *to_remove, int flags)
+{
+  unsigned first_new_block;
+
+  if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
+    return false;
+  if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
+    return false;
+
+  first_new_block = last_basic_block_for_fn (cfun);
+  if (!duplicate_loop_body_to_header_edge (loop, e, ndupl, wont_exit, orig,
+					   to_remove, flags))
+    return false;
+
+  /* Readd the removed phi args for e.  */
+  flush_pending_stmts (e);
+
+  /* Copy the phi node arguments.  */
+  copy_phi_node_args (first_new_block);
+
+  scev_reset ();
+
+  return true;
+}
+
+/* Returns true if we can unroll LOOP FACTOR times.  Number
+   of iterations of the loop is returned in NITER.  */
+
+bool
+can_unroll_loop_p (class loop *loop, unsigned factor,
+		   class tree_niter_desc *niter)
+{
+  edge exit;
+
+  /* Check whether unrolling is possible.  We only want to unroll loops
+     for that we are able to determine number of iterations.  We also
+     want to split the extra iterations of the loop from its end,
+     therefore we require that the loop has precisely one
+     exit.  */
+
+  exit = single_dom_exit (loop);
+  if (!exit)
+    return false;
+
+  if (!number_of_iterations_exit (loop, exit, niter, false)
+      || niter->cmp == ERROR_MARK
+      /* Scalar evolutions analysis might have copy propagated
+	 the abnormal ssa names into these expressions, hence
+	 emitting the computations based on them during loop
+	 unrolling might create overlapping life ranges for
+	 them, and failures in out-of-ssa.  */
+      || contains_abnormal_ssa_name_p (niter->may_be_zero)
+      || contains_abnormal_ssa_name_p (niter->control.base)
+      || contains_abnormal_ssa_name_p (niter->control.step)
+      || contains_abnormal_ssa_name_p (niter->bound))
+    return false;
+
+  /* And of course, we must be able to duplicate the loop.  */
+  if (!can_duplicate_loop_p (loop))
+    return false;
+
+  /* The final loop should be small enough.  */
+  if (tree_num_loop_insns (loop, &eni_size_weights) * factor
+      > (unsigned) param_max_unrolled_insns)
+    return false;
+
+  return true;
+}
+
+/* Determines the conditions that control execution of LOOP unrolled FACTOR
+   times.  DESC is number of iterations of LOOP.  ENTER_COND is set to
+   condition that must be true if the main loop can be entered.
+   If the loop does not always iterate an exact multiple of FACTOR times,
+   EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
+   how the exit from the unrolled loop should be controlled.  Otherwise,
+   the trees are set to null and EXIT_CMP is set to ERROR_MARK.  */
+
+static void
+determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
+			   unsigned factor, tree *enter_cond,
+			   tree *exit_base, tree *exit_step,
+			   enum tree_code *exit_cmp, tree *exit_bound)
+{
+  gimple_seq stmts;
+  tree base = desc->control.base;
+  tree step = desc->control.step;
+  tree bound = desc->bound;
+  tree type = TREE_TYPE (step);
+  tree bigstep, delta;
+  tree min = lower_bound_in_type (type, type);
+  tree max = upper_bound_in_type (type, type);
+  enum tree_code cmp = desc->cmp;
+  tree cond = boolean_true_node, assum;
+
+  /* For pointers, do the arithmetics in the type of step.  */
+  base = fold_convert (type, base);
+  bound = fold_convert (type, bound);
+
+  *enter_cond = boolean_false_node;
+  *exit_base = NULL_TREE;
+  *exit_step = NULL_TREE;
+  *exit_cmp = ERROR_MARK;
+  *exit_bound = NULL_TREE;
+  gcc_assert (cmp != ERROR_MARK);
+
+  /* We only need to be correct when we answer question
+     "Do at least FACTOR more iterations remain?" in the unrolled loop.
+     Thus, transforming BASE + STEP * i <> BOUND to
+     BASE + STEP * i < BOUND is ok.  */
+  if (cmp == NE_EXPR)
+    {
+      if (tree_int_cst_sign_bit (step))
+	cmp = GT_EXPR;
+      else
+	cmp = LT_EXPR;
+    }
+  else if (cmp == LT_EXPR)
+    {
+      gcc_assert (!tree_int_cst_sign_bit (step));
+    }
+  else if (cmp == GT_EXPR)
+    {
+      gcc_assert (tree_int_cst_sign_bit (step));
+    }
+  else
+    gcc_unreachable ();
+
+  /* The main body of the loop may be entered iff:
+
+     1) desc->may_be_zero is false.
+     2) it is possible to check that there are at least FACTOR iterations
+	of the loop, i.e., BOUND - step * FACTOR does not overflow.
+     3) # of iterations is at least FACTOR  */
+
+  if (!integer_zerop (desc->may_be_zero))
+    cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+			invert_truthvalue (desc->may_be_zero),
+			cond);
+
+  bigstep = fold_build2 (MULT_EXPR, type, step,
+			 build_int_cst_type (type, factor));
+  delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
+  if (cmp == LT_EXPR)
+    assum = fold_build2 (GE_EXPR, boolean_type_node,
+			 bound,
+			 fold_build2 (PLUS_EXPR, type, min, delta));
+  else
+    assum = fold_build2 (LE_EXPR, boolean_type_node,
+			 bound,
+			 fold_build2 (PLUS_EXPR, type, max, delta));
+  cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
+
+  bound = fold_build2 (MINUS_EXPR, type, bound, delta);
+  assum = fold_build2 (cmp, boolean_type_node, base, bound);
+  cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
+
+  if (integer_nonzerop (cond)
+      && integer_zerop (desc->may_be_zero))
+    {
+      /* Convert the latch count to an iteration count.  */
+      tree niter = fold_build2 (PLUS_EXPR, type, desc->niter,
+				build_one_cst (type));
+      if (multiple_of_p (type, niter, bigstep))
+	return;
+    }
+
+  cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
+  if (stmts)
+    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+  /* cond now may be a gimple comparison, which would be OK, but also any
+     other gimple rhs (say a && b).  In this case we need to force it to
+     operand.  */
+  if (!is_gimple_condexpr (cond))
+    {
+      cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
+      if (stmts)
+	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+    }
+  *enter_cond = cond;
+
+  base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
+  if (stmts)
+    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+  bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
+  if (stmts)
+    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+
+  *exit_base = base;
+  *exit_step = bigstep;
+  *exit_cmp = cmp;
+  *exit_bound = bound;
+}
+
+/* Scales the frequencies of all basic blocks in LOOP that are strictly
+   dominated by BB by NUM/DEN.  */
+
+static void
+scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
+				profile_count num, profile_count den)
+{
+  basic_block son;
+
+  if (!den.nonzero_p () && !(num == profile_count::zero ()))
+    return;
+
+  for (son = first_dom_son (CDI_DOMINATORS, bb);
+       son;
+       son = next_dom_son (CDI_DOMINATORS, son))
+    {
+      if (!flow_bb_inside_loop_p (loop, son))
+	continue;
+      scale_bbs_frequencies_profile_count (&son, 1, num, den);
+      scale_dominated_blocks_in_loop (loop, son, num, den);
+    }
+}
+
+/* Return estimated niter for LOOP after unrolling by FACTOR times.  */
+
+gcov_type
+niter_for_unrolled_loop (class loop *loop, unsigned factor)
+{
+  gcc_assert (factor != 0);
+  bool profile_p = false;
+  gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
+  /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
+     "+ 1" converts latch iterations to loop iterations and the "- 1"
+     converts back.  */
+  gcov_type new_est_niter = est_niter / factor;
+
+  if (est_niter == -1)
+    return -1;
+
+  /* Without profile feedback, loops for which we do not know a better estimate
+     are assumed to roll 10 times.  When we unroll such loop, it appears to
+     roll too little, and it may even seem to be cold.  To avoid this, we
+     ensure that the created loop appears to roll at least 5 times (but at
+     most as many times as before unrolling).  Don't do adjustment if profile
+     feedback is present.  */
+  if (new_est_niter < 5 && !profile_p)
+    {
+      if (est_niter < 5)
+	new_est_niter = est_niter;
+      else
+	new_est_niter = 5;
+    }
+
+  if (loop->any_upper_bound)
+    {
+      /* As above, this is really CEIL (upper_bound + 1, factor) - 1.  */
+      widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
+					 factor);
+      if (wi::ltu_p (bound, new_est_niter))
+	new_est_niter = bound.to_uhwi ();
+    }
+
+  return new_est_niter;
+}
+
+/* Unroll LOOP FACTOR times.  LOOP is known to have a single exit edge
+   whose source block dominates the latch.  DESC describes the number of
+   iterations of LOOP.
+
+   If N is number of iterations of the loop and MAY_BE_ZERO is the condition
+   under that loop exits in the first iteration even if N != 0,
+
+   while (1)
+     {
+       x = phi (init, next);
+
+       pre;
+       if (st)
+         break;
+       post;
+     }
+
+   becomes (with possibly the exit conditions formulated a bit differently,
+   avoiding the need to create a new iv):
+
+   if (MAY_BE_ZERO || N < FACTOR)
+     goto rest;
+
+   do
+     {
+       x = phi (init, next);
+
+       pre;
+       post;
+       pre;
+       post;
+       ...
+       pre;
+       post;
+       N -= FACTOR;
+
+     } while (N >= FACTOR);
+
+   rest:
+     init' = phi (init, x);
+
+   while (1)
+     {
+       x = phi (init', next);
+
+       pre;
+       if (st)
+         break;
+       post;
+     }
+
+   Before the loop is unrolled, TRANSFORM is called for it (only for the
+   unrolled loop, but not for its versioned copy).  DATA is passed to
+   TRANSFORM.  */
+
+/* Probability in % that the unrolled loop is entered.  Just a guess.  */
+#define PROB_UNROLLED_LOOP_ENTERED 90
+
+void
+tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
+				class tree_niter_desc *desc,
+				transform_callback transform,
+				void *data)
+{
+  gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
+  unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
+
+  enum tree_code exit_cmp;
+  tree enter_main_cond, exit_base, exit_step, exit_bound;
+  determine_exit_conditions (loop, desc, factor,
+			     &enter_main_cond, &exit_base, &exit_step,
+			     &exit_cmp, &exit_bound);
+  bool single_loop_p = !exit_base;
+
+  /* Let us assume that the unrolled loop is quite likely to be entered.  */
+  profile_probability prob_entry;
+  if (integer_nonzerop (enter_main_cond))
+    prob_entry = profile_probability::always ();
+  else
+    prob_entry = profile_probability::guessed_always ()
+			.apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
+
+  gcond *exit_if = nullptr;
+  class loop *new_loop = nullptr;
+  edge new_exit;
+  if (!single_loop_p)
+    {
+      edge exit = single_dom_exit (loop);
+
+      /* The values for scales should keep profile consistent, and somewhat
+	 close to correct.
+
+	 TODO: The current value of SCALE_REST makes it appear that the loop
+	 that is created by splitting the remaining iterations of the unrolled
+	 loop is executed the same number of times as the original loop, and
+	 with the same frequencies, which is obviously wrong.  This does not
+	 appear to cause problems, so we do not bother with fixing it for now.
+	 To make the profile correct, we would need to change the probability
+	 of the exit edge of the loop, and recompute the distribution of
+	 frequencies in its body because of this change (scale the frequencies
+	 of blocks before and after the exit by appropriate factors).  */
+      profile_probability scale_unrolled = prob_entry;
+      new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
+			       prob_entry.invert (), scale_unrolled,
+			       profile_probability::guessed_always (),
+			       true);
+      gcc_assert (new_loop != NULL);
+      update_ssa (TODO_update_ssa);
+
+      /* Prepare the cfg and update the phi nodes.  Move the loop exit to the
+	 loop latch (and make its condition dummy, for the moment).  */
+      basic_block rest = loop_preheader_edge (new_loop)->src;
+      edge precond_edge = single_pred_edge (rest);
+      split_edge (loop_latch_edge (loop));
+      basic_block exit_bb = single_pred (loop->latch);
+
+      /* Since the exit edge will be removed, the frequency of all the blocks
+	 in the loop that are dominated by it must be scaled by
+	 1 / (1 - exit->probability).  */
+      if (exit->probability.initialized_p ())
+	scale_dominated_blocks_in_loop (loop, exit->src,
+					/* We are scaling up here so
+					   probability does not fit.  */
+					loop->header->count,
+					loop->header->count
+					- loop->header->count.apply_probability
+					    (exit->probability));
+
+      gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
+      exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
+				   integer_zero_node,
+				   NULL_TREE, NULL_TREE);
+
+      gsi_insert_after (&bsi, exit_if, GSI_NEW_STMT);
+      new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
+      rescan_loop_exit (new_exit, true, false);
+
+      /* Set the probability of new exit to the same of the old one.  Fix
+	 the frequency of the latch block, by scaling it back by
+	 1 - exit->probability.  */
+      new_exit->probability = exit->probability;
+      edge new_nonexit = single_pred_edge (loop->latch);
+      new_nonexit->probability = exit->probability.invert ();
+      new_nonexit->flags = EDGE_TRUE_VALUE;
+      if (new_nonexit->probability.initialized_p ())
+	scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability);
+
+      edge old_entry = loop_preheader_edge (loop);
+      edge new_entry = loop_preheader_edge (new_loop);
+      edge old_latch = loop_latch_edge (loop);
+      for (gphi_iterator psi_old_loop = gsi_start_phis (loop->header),
+	     psi_new_loop = gsi_start_phis (new_loop->header);
+	   !gsi_end_p (psi_old_loop);
+	   gsi_next (&psi_old_loop), gsi_next (&psi_new_loop))
+	{
+	  gphi *phi_old_loop = psi_old_loop.phi ();
+	  gphi *phi_new_loop = psi_new_loop.phi ();
+
+	  tree init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
+	  use_operand_p op
+	    = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
+	  gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
+	  tree next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
+
+	  /* Prefer using original variable as a base for the new ssa name.
+	     This is necessary for virtual ops, and useful in order to avoid
+	     losing debug info for real ops.  */
+	  tree new_init;
+	  if (TREE_CODE (next) == SSA_NAME
+	      && useless_type_conversion_p (TREE_TYPE (next),
+					    TREE_TYPE (init)))
+	    new_init = copy_ssa_name (next);
+	  else if (TREE_CODE (init) == SSA_NAME
+		   && useless_type_conversion_p (TREE_TYPE (init),
+						 TREE_TYPE (next)))
+	    new_init = copy_ssa_name (init);
+	  else if (useless_type_conversion_p (TREE_TYPE (next),
+					      TREE_TYPE (init)))
+	    new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
+					   "unrinittmp");
+	  else
+	    new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
+					   "unrinittmp");
+
+	  gphi *phi_rest = create_phi_node (new_init, rest);
+	  add_phi_arg (phi_rest, init, precond_edge, UNKNOWN_LOCATION);
+	  add_phi_arg (phi_rest, next, new_exit, UNKNOWN_LOCATION);
+	  SET_USE (op, new_init);
+	}
+
+      remove_path (exit);
+    }
+  else
+    new_exit = single_dom_exit (loop);
+
+  /* Transform the loop.  */
+  if (transform)
+    (*transform) (loop, data);
+
+  /* Unroll the loop and remove the exits in all iterations except for the
+     last one.  */
+  auto_sbitmap wont_exit (factor);
+  bitmap_ones (wont_exit);
+  bitmap_clear_bit (wont_exit, factor - 1);
+
+  auto_vec<edge> to_remove;
+  bool ok
+    = gimple_duplicate_loop_body_to_header_edge (loop, loop_latch_edge (loop),
+						 factor - 1, wont_exit,
+						 new_exit, &to_remove,
+						 DLTHE_FLAG_UPDATE_FREQ);
+  gcc_assert (ok);
+
+  for (edge e : to_remove)
+    {
+      ok = remove_path (e);
+      gcc_assert (ok);
+    }
+  update_ssa (TODO_update_ssa);
+
+  new_exit = single_dom_exit (loop);
+  if (!single_loop_p)
+    {
+      /* Ensure that the frequencies in the loop match the new estimated
+	 number of iterations, and change the probability of the new
+	 exit edge.  */
+
+      profile_count freq_h = loop->header->count;
+      profile_count freq_e = (loop_preheader_edge (loop))->count ();
+      if (freq_h.nonzero_p ())
+	{
+	  /* Avoid dropping loop body profile counter to 0 because of zero
+	     count in loop's preheader.  */
+	  if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ()))
+	    freq_e = freq_e.force_nonzero ();
+	  scale_loop_frequencies (loop, freq_e.probability_in (freq_h));
+	}
+
+      basic_block rest = new_exit->dest;
+      new_exit->probability = profile_probability::always ()
+	.apply_scale (1, new_est_niter + 1);
+
+      rest->count += new_exit->count ();
+
+      edge new_nonexit = single_pred_edge (loop->latch);
+      profile_probability prob = new_nonexit->probability;
+      new_nonexit->probability = new_exit->probability.invert ();
+      prob = new_nonexit->probability / prob;
+      if (prob.initialized_p ())
+	scale_bbs_frequencies (&loop->latch, 1, prob);
+
+      /* Finally create the new counter for number of iterations and add
+	 the new exit instruction.  */
+      tree ctr_before, ctr_after;
+      gimple_stmt_iterator bsi = gsi_last_nondebug_bb (new_exit->src);
+      exit_if = as_a <gcond *> (gsi_stmt (bsi));
+      create_iv (exit_base, exit_step, NULL_TREE, loop,
+		 &bsi, false, &ctr_before, &ctr_after);
+      gimple_cond_set_code (exit_if, exit_cmp);
+      gimple_cond_set_lhs (exit_if, ctr_after);
+      gimple_cond_set_rhs (exit_if, exit_bound);
+      update_stmt (exit_if);
+    }
+  else
+    {
+      /* gimple_duplicate_loop_to_header_edge has adjusted the loop body's
+	 original profile counts in line with the unroll factor.  However,
+	 the old counts might not have been consistent with the old
+	 iteration count.
+
+	 Therefore, if the iteration count is known exactly, make sure that the
+	 profile counts of the loop header (and any other blocks that might be
+	 executed in the final iteration) are consistent with the combination
+	 of (a) the incoming profile count and (b) the new iteration count.  */
+      profile_count in_count = loop_preheader_edge (loop)->count ();
+      profile_count old_header_count = loop->header->count;
+      if (in_count.nonzero_p ()
+	  && old_header_count.nonzero_p ()
+	  && TREE_CODE (desc->niter) == INTEGER_CST)
+	{
+	  /* The + 1 converts latch counts to iteration counts.  */
+	  profile_count new_header_count
+	    = (in_count.apply_scale (new_est_niter + 1, 1));
+	  basic_block *body = get_loop_body (loop);
+	  scale_bbs_frequencies_profile_count (body, loop->num_nodes,
+					       new_header_count,
+					       old_header_count);
+	  free (body);
+	}
+
+      /* gimple_duplicate_loop_to_header_edge discarded FACTOR - 1
+	 exit edges and adjusted the loop body's profile counts for the
+	 new probabilities of the remaining non-exit edges.  However,
+	 the remaining exit edge still has the same probability as it
+	 did before, even though it is now more likely.
+
+	 Therefore, all blocks executed after a failed exit test now have
+	 a profile count that is too high, and the sum of the profile counts
+	 for the header's incoming edges is greater than the profile count
+	 of the header itself.
+
+	 Adjust the profile counts of all code in the loop body after
+	 the exit test so that the sum of the counts on entry to the
+	 header agree.  */
+      profile_count old_latch_count = loop_latch_edge (loop)->count ();
+      profile_count new_latch_count = loop->header->count - in_count;
+      if (old_latch_count.nonzero_p () && new_latch_count.nonzero_p ())
+	scale_dominated_blocks_in_loop (loop, new_exit->src, new_latch_count,
+					old_latch_count);
+
+      /* Set the probability of the exit edge based on NEW_EST_NITER
+	 (which estimates latch counts rather than iteration counts).
+	 Update the probabilities of other edges to match.
+
+	 If the profile counts are large enough to give the required
+	 precision, the updates above will have made
+
+	    e->dest->count / e->src->count ~= new e->probability
+
+	 for every outgoing edge e of NEW_EXIT->src.  */
+      profile_probability new_exit_prob = profile_probability::always ()
+	.apply_scale (1, new_est_niter + 1);
+      change_edge_frequency (new_exit, new_exit_prob);
+    }
+
+  checking_verify_flow_info ();
+  checking_verify_loop_structure ();
+  checking_verify_loop_closed_ssa (true, loop);
+  checking_verify_loop_closed_ssa (true, new_loop);
+}
+
+/* Wrapper over tree_transform_and_unroll_loop for case we do not
+   want to transform the loop before unrolling.  The meaning
+   of the arguments is the same as for tree_transform_and_unroll_loop.  */
+
+void
+tree_unroll_loop (class loop *loop, unsigned factor,
+		  class tree_niter_desc *desc)
+{
+  tree_transform_and_unroll_loop (loop, factor, desc, NULL, NULL);
+}
+
+/* Rewrite the phi node at position PSI in function of the main
+   induction variable MAIN_IV and insert the generated code at GSI.  */
+
+static void
+rewrite_phi_with_iv (loop_p loop,
+		     gphi_iterator *psi,
+		     gimple_stmt_iterator *gsi,
+		     tree main_iv)
+{
+  affine_iv iv;
+  gassign *stmt;
+  gphi *phi = psi->phi ();
+  tree atype, mtype, val, res = PHI_RESULT (phi);
+
+  if (virtual_operand_p (res) || res == main_iv)
+    {
+      gsi_next (psi);
+      return;
+    }
+
+  if (!simple_iv (loop, loop, res, &iv, true))
+    {
+      gsi_next (psi);
+      return;
+    }
+
+  remove_phi_node (psi, false);
+
+  atype = TREE_TYPE (res);
+  mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
+  val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
+		     fold_convert (mtype, main_iv));
+  val = fold_build2 (POINTER_TYPE_P (atype)
+		     ? POINTER_PLUS_EXPR : PLUS_EXPR,
+		     atype, unshare_expr (iv.base), val);
+  val = force_gimple_operand_gsi (gsi, val, false, NULL_TREE, true,
+				  GSI_SAME_STMT);
+  stmt = gimple_build_assign (res, val);
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+}
+
+/* Rewrite all the phi nodes of LOOP in function of the main induction
+   variable MAIN_IV.  */
+
+static void
+rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv)
+{
+  unsigned i;
+  basic_block *bbs = get_loop_body_in_dom_order (loop);
+  gphi_iterator psi;
+
+  for (i = 0; i < loop->num_nodes; i++)
+    {
+      basic_block bb = bbs[i];
+      gimple_stmt_iterator gsi = gsi_after_labels (bb);
+
+      if (bb->loop_father != loop)
+	continue;
+
+      for (psi = gsi_start_phis (bb); !gsi_end_p (psi); )
+	rewrite_phi_with_iv (loop, &psi, &gsi, main_iv);
+    }
+
+  free (bbs);
+}
+
+/* Bases all the induction variables in LOOP on a single induction variable
+   (with base 0 and step 1), whose final value is compared with *NIT.  When the
+   IV type precision has to be larger than *NIT type precision, *NIT is
+   converted to the larger type, the conversion code is inserted before the
+   loop, and *NIT is updated to the new definition.  When BUMP_IN_LATCH is true,
+   the induction variable is incremented in the loop latch, otherwise it is
+   incremented in the loop header.  Return the induction variable that was
+   created.  */
+
+tree
+canonicalize_loop_ivs (class loop *loop, tree *nit, bool bump_in_latch)
+{
+  unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
+  unsigned original_precision = precision;
+  tree type, var_before;
+  gimple_stmt_iterator gsi;
+  gphi_iterator psi;
+  gcond *stmt;
+  edge exit = single_dom_exit (loop);
+  gimple_seq stmts;
+  bool unsigned_p = false;
+
+  for (psi = gsi_start_phis (loop->header);
+       !gsi_end_p (psi); gsi_next (&psi))
+    {
+      gphi *phi = psi.phi ();
+      tree res = PHI_RESULT (phi);
+      bool uns;
+
+      type = TREE_TYPE (res);
+      if (virtual_operand_p (res)
+	  || (!INTEGRAL_TYPE_P (type)
+	      && !POINTER_TYPE_P (type))
+	  || TYPE_PRECISION (type) < precision)
+	continue;
+
+      uns = POINTER_TYPE_P (type) | TYPE_UNSIGNED (type);
+
+      if (TYPE_PRECISION (type) > precision)
+	unsigned_p = uns;
+      else
+	unsigned_p |= uns;
+
+      precision = TYPE_PRECISION (type);
+    }
+
+  scalar_int_mode mode = smallest_int_mode_for_size (precision);
+  precision = GET_MODE_PRECISION (mode);
+  type = build_nonstandard_integer_type (precision, unsigned_p);
+
+  if (original_precision != precision
+      || TYPE_UNSIGNED (TREE_TYPE (*nit)) != unsigned_p)
+    {
+      *nit = fold_convert (type, *nit);
+      *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE);
+      if (stmts)
+	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+    }
+
+  if (bump_in_latch)
+    gsi = gsi_last_bb (loop->latch);
+  else
+    gsi = gsi_last_nondebug_bb (loop->header);
+  create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
+	     loop, &gsi, bump_in_latch, &var_before, NULL);
+
+  rewrite_all_phi_nodes_with_iv (loop, var_before);
+
+  stmt = as_a <gcond *> (last_stmt (exit->src));
+  /* Make the loop exit if the control condition is not satisfied.  */
+  if (exit->flags & EDGE_TRUE_VALUE)
+    {
+      edge te, fe;
+
+      extract_true_false_edges_from_block (exit->src, &te, &fe);
+      te->flags = EDGE_FALSE_VALUE;
+      fe->flags = EDGE_TRUE_VALUE;
+    }
+  gimple_cond_set_code (stmt, LT_EXPR);
+  gimple_cond_set_lhs (stmt, var_before);
+  gimple_cond_set_rhs (stmt, *nit);
+  update_stmt (stmt);
+
+  return var_before;
+}
author	Thomas Schwinge <thomas@codesourcery.com>	2022-02-03 21:12:21 +0100
committer	Thomas Schwinge <thomas@codesourcery.com>	2022-02-03 21:14:10 +0100
commit	7eef766dc5a8abda2ca2cf8d535cdf160f40b50c (patch)
tree	f85ed9010c56dc8f250d7cba5761b4eae58f2a42 /gcc/tree-ssa-loop-manip.cc
parent	5199ecb8519c4c5f92160365cefe8e0aa1ca3873 (diff)
parent	ff7aeceb6b3a476c3bac66a7f39a5ef4240206fc (diff)
download	gcc-7eef766dc5a8abda2ca2cf8d535cdf160f40b50c.zip gcc-7eef766dc5a8abda2ca2cf8d535cdf160f40b50c.tar.gz gcc-7eef766dc5a8abda2ca2cf8d535cdf160f40b50c.tar.bz2