1 files changed, 119 insertions, 30 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 6dc0c72..20c867c 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1345,7 +1345,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
   STMT_VINFO_IN_PATTERN_P (res) = false;
   STMT_VINFO_RELATED_STMT (res) = NULL;
   STMT_VINFO_DATA_REF (res) = NULL;
-  if (TREE_CODE (stmt) == PHI_NODE)
+  if (TREE_CODE (stmt) == PHI_NODE && is_loop_header_bb_p (bb_for_stmt (stmt)))
     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
   else
     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
@@ -1364,6 +1364,20 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
 }
 
 
+/* Function bb_in_loop_p
+
+   Used as predicate for dfs order traversal of the loop bbs.  */
+
+static bool
+bb_in_loop_p (const_basic_block bb, const void *data)
+{
+  struct loop *loop = (struct loop *)data;
+  if (flow_bb_inside_loop_p (loop, bb))
+    return true;
+  return false;
+}
+
+
 /* Function new_loop_vec_info.
 
    Create and initialize a new loop_vec_info struct for LOOP, as well as
@@ -1375,37 +1389,76 @@ new_loop_vec_info (struct loop *loop)
   loop_vec_info res;
   basic_block *bbs;
   block_stmt_iterator si;
-  unsigned int i;
+  unsigned int i, nbbs;
 
   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
+  LOOP_VINFO_LOOP (res) = loop;
 
   bbs = get_loop_body (loop);
 
-  /* Create stmt_info for all stmts in the loop.  */
+  /* Create/Update stmt_info for all stmts in the loop.  */
   for (i = 0; i < loop->num_nodes; i++)
     {
       basic_block bb = bbs[i];
       tree phi;
 
-      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
-        {
-          stmt_ann_t ann = get_stmt_ann (phi);
-          set_stmt_info (ann, new_stmt_vec_info (phi, res));
-        }
-
-      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+      /* BBs in a nested inner-loop will have been already processed (because 
+	 we will have called vect_analyze_loop_form for any nested inner-loop).
+	 Therefore, for stmts in an inner-loop we just want to update the 
+	 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new 
+	 loop_info of the outer-loop we are currently considering to vectorize 
+	 (instead of the loop_info of the inner-loop).
+	 For stmts in other BBs we need to create a stmt_info from scratch.  */
+      if (bb->loop_father != loop)
 	{
-	  tree stmt = bsi_stmt (si);
-	  stmt_ann_t ann;
+	  /* Inner-loop bb.  */
+	  gcc_assert (loop->inner && bb->loop_father == loop->inner);
+	  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+	    {
+	      stmt_vec_info stmt_info = vinfo_for_stmt (phi);
+	      loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+	      gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
+	      STMT_VINFO_LOOP_VINFO (stmt_info) = res;
+	    }
+	  for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+	   {
+	      tree stmt = bsi_stmt (si);
+	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+	      loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+	      gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
+	      STMT_VINFO_LOOP_VINFO (stmt_info) = res;
+	   }
+	}
+      else
+	{
+	  /* bb in current nest.  */
+	  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+	    {
+	      stmt_ann_t ann = get_stmt_ann (phi);
+	      set_stmt_info (ann, new_stmt_vec_info (phi, res));
+	    }
 
-	  ann = stmt_ann (stmt);
-	  set_stmt_info (ann, new_stmt_vec_info (stmt, res));
+	  for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+	    {
+	      tree stmt = bsi_stmt (si);
+	      stmt_ann_t ann = stmt_ann (stmt);
+	      set_stmt_info (ann, new_stmt_vec_info (stmt, res));
+	    }
 	}
     }
 
-  LOOP_VINFO_LOOP (res) = loop;
+  /* CHECKME: We want to visit all BBs before their successors (except for 
+     latch blocks, for which this assertion wouldn't hold).  In the simple 
+     case of the loop forms we allow, a dfs order of the BBs would the same 
+     as reversed postorder traversal, so we are safe.  */
+
+   free (bbs);
+   bbs = XCNEWVEC (basic_block, loop->num_nodes);
+   nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, 
+			      bbs, loop->num_nodes, loop);
+   gcc_assert (nbbs == loop->num_nodes);
+
   LOOP_VINFO_BBS (res) = bbs;
-  LOOP_VINFO_EXIT_COND (res) = NULL;
   LOOP_VINFO_NITERS (res) = NULL;
   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
@@ -1430,7 +1483,7 @@ new_loop_vec_info (struct loop *loop)
    stmts in the loop.  */
 
 void
-destroy_loop_vec_info (loop_vec_info loop_vinfo)
+destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 {
   struct loop *loop;
   basic_block *bbs;
@@ -1446,6 +1499,18 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo)
   bbs = LOOP_VINFO_BBS (loop_vinfo);
   nbbs = loop->num_nodes;
 
+  if (!clean_stmts)
+    {
+      free (LOOP_VINFO_BBS (loop_vinfo));
+      free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
+      free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+      VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
+
+      free (loop_vinfo);
+      loop->aux = NULL;
+      return;
+    }
+
   for (j = 0; j < nbbs; j++)
     {
       basic_block bb = bbs[j];
@@ -1597,7 +1662,6 @@ vect_supportable_dr_alignment (struct data_reference *dr)
     return dr_aligned;
 
   /* Possibly unaligned access.  */
-  
   if (DR_IS_READ (dr))
     {
       if (optab_handler (vec_realign_load_optab, mode)->insn_code != CODE_FOR_nothing
@@ -1718,8 +1782,6 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
     {
     case PHI_NODE:
       *def = PHI_RESULT (*def_stmt);
-      gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
-		  || *dt == vect_invariant_def);
       break;
 
     case GIMPLE_MODIFY_STMT:
@@ -1760,6 +1822,8 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
                                 enum tree_code *code1, enum tree_code *code2)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
   bool ordered_p;
   enum machine_mode vec_mode;
   enum insn_code icode1, icode2;
@@ -1782,9 +1846,15 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
      Some targets can take advantage of this and generate more efficient code.
      For example, targets like Altivec, that support widen_mult using a sequence
      of {mult_even,mult_odd} generate the following vectors:
-        vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].  */
+        vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
+
+     When vectorizaing outer-loops, we execute the inner-loop sequentially
+     (each vectorized inner-loop iteration contributes to VF outer-loop 
+     iterations in parallel). We therefore don't allow to change the order 
+     of the computation in the inner-loop during outer-loop vectorization.  */
 
-   if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction)
+   if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
+       && !nested_in_vect_loop_p (vect_loop, stmt))
      ordered_p = false;
    else
      ordered_p = true;
@@ -2008,8 +2078,10 @@ reduction_code_for_scalar_code (enum tree_code code,
    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
 
 tree
-vect_is_simple_reduction (struct loop *loop, tree phi)
+vect_is_simple_reduction (loop_vec_info loop_info, tree phi)
 {
+  struct loop *loop = (bb_for_stmt (phi))->loop_father;
+  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
   edge latch_e = loop_latch_edge (loop);
   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
   tree def_stmt, def1, def2;
@@ -2022,6 +2094,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
   imm_use_iterator imm_iter;
   use_operand_p use_p;
 
+  gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop));
+
   name = PHI_RESULT (phi);
   nloop_uses = 0;
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
@@ -2133,8 +2207,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
       return NULL_TREE;
     }
 
+  /* Generally, when vectorizing a reduction we change the order of the
+     computation.  This may change the behavior of the program in some
+     cases, so we need to check that this is ok.  One exception is when 
+     vectorizing an outer-loop: the inner-loop is executed sequentially,
+     and therefore vectorizing reductions in the inner-loop durint 
+     outer-loop vectorization is safe.  */
+
   /* CHECKME: check for !flag_finite_math_only too?  */
-  if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
+  if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations
+      && !nested_in_vect_loop_p (vect_loop, def_stmt)) 
     {
       /* Changing the order of operations changes the semantics.  */
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -2144,7 +2226,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
         }
       return NULL_TREE;
     }
-  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
+  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
+	   && !nested_in_vect_loop_p (vect_loop, def_stmt))
     {
       /* Changing the order of operations changes the semantics.  */
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -2183,13 +2266,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
 
 
   /* Check that one def is the reduction def, defined by PHI,
-     the other def is either defined in the loop by a GIMPLE_MODIFY_STMT,
-     or it's an induction (defined by some phi node).  */
+     the other def is either defined in the loop ("vect_loop_def"),
+     or it's an induction (defined by a loop-header phi-node).  */
 
   if (def2 == phi
       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
       && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT 
-	  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def))
+	  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
+	  || (TREE_CODE (def1) == PHI_NODE 
+	      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def
+	      && !is_loop_header_bb_p (bb_for_stmt (def1)))))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
         {
@@ -2201,7 +2287,10 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
   else if (def1 == phi
 	   && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
 	   && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT 
-	       || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def))
+	       || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def
+	       || (TREE_CODE (def2) == PHI_NODE
+		   && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def
+		   && !is_loop_header_bb_p (bb_for_stmt (def2)))))
     {
       /* Swap operands (just for simplicity - so that the rest of the code
 	 can assume that the reduction variable is always the last (second)
@@ -2340,7 +2429,7 @@ vectorize_loops (void)
       if (!loop)
 	continue;
       loop_vinfo = loop->aux;
-      destroy_loop_vec_info (loop_vinfo);
+      destroy_loop_vec_info (loop_vinfo, true);
       loop->aux = NULL;
     }