target.h (builtin_vectorization_cost): Add new target builtin.

2007-07-12 Dorit Nuzman <dorit@il.ibm.com> * target.h (builtin_vectorization_cost): Add new target builtin. * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New. * tree-vectorizer.h (TARG_SCALAR_STMT_COST): New. (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New. * tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze uninitialized variables. * tree-vect-transform.c (cost_for_stmt): New function. (vect_estimate_min_profitable_iters): Call cost_for_stmt instead of using cost 1 for all scalar stmts. Be less conservative when estimating the number of prologue/epulogue iterations. Call targetm.vectorize.builtin_vectorization_cost. Return min_profitable_iters-1. (vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for initialization cost instead of TARG_VEC_STMT_COST. Use TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction epilogue code. Fix epilogue cost computation. * config/spu/spu.c (spu_builtin_vectorization_cost): New. (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement. * config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST): (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST): (TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST): (TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define. 2007-07-12 Dorit Nuzman <dorit@il.ibm.com> * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now get vectorized. * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops now get vectorized. * gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New. * gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New. * lib/target-supports.exp (check_effective_target_vect_int_mul): Add spu. From-SVN: r126584
author: Dorit Nuzman <dorit@il.ibm.com> 2007-07-12 12:17:03 +0000
committer: Dorit Nuzman <dorit@gcc.gnu.org> 2007-07-12 12:17:03 +0000
commit: e95b59d2abbeea533bfcea315ed2b47412f21470 (patch)
tree: 85414467a7c6b6b6755c5dbe54511f8981c7db54 /gcc/tree-vect-transform.c
parent: e1c8221962aa8dfba5b2462449bccfe10c2d561e (diff)
download: gcc-e95b59d2abbeea533bfcea315ed2b47412f21470.zip
gcc-e95b59d2abbeea533bfcea315ed2b47412f21470.tar.gz
gcc-e95b59d2abbeea533bfcea315ed2b47412f21470.tar.bz2
1 files changed, 70 insertions, 13 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 8a6e086..193c549 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -74,6 +74,34 @@ static void vect_update_inits_of_drs (loop_vec_info, tree);
 static int vect_min_worthwhile_factor (enum tree_code);
 
 
+static int
+cost_for_stmt (tree stmt)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+  switch (STMT_VINFO_TYPE (stmt_info))
+  {
+  case load_vec_info_type:
+    return TARG_SCALAR_LOAD_COST;
+  case store_vec_info_type:
+    return TARG_SCALAR_STORE_COST;
+  case op_vec_info_type:
+  case condition_vec_info_type:
+  case assignment_vec_info_type:
+  case reduc_vec_info_type:
+  case induc_vec_info_type:
+  case type_promotion_vec_info_type:
+  case type_demotion_vec_info_type:
+  case type_conversion_vec_info_type:
+  case call_vec_info_type:
+    return TARG_SCALAR_STMT_COST;
+  case undef_vec_info_type:
+  default:
+    gcc_unreachable ();
+  }
+}
+
+
 /* Function vect_estimate_min_profitable_iters
 
    Return the number of iterations required for the vector version of the
@@ -138,7 +166,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
           if (!STMT_VINFO_RELEVANT_P (stmt_info)
               && !STMT_VINFO_LIVE_P (stmt_info))
             continue;
-          scalar_single_iter_cost++;
+          scalar_single_iter_cost += cost_for_stmt (stmt);
           vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
           vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
         }
@@ -148,7 +176,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
      loop.
 
      FORNOW: If we dont know the value of peel_iters for prologue or epilogue
-     at compile-time - we assume the worst.  
+     at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
 
      TODO: Build an expression that represents peel_iters for prologue and
      epilogue to be used in a run-time test.  */
@@ -157,17 +185,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
 
   if (byte_misalign < 0)
     {
-      peel_iters_prologue = vf - 1;
+      peel_iters_prologue = (vf - 1)/2;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "cost model: "
-                 "prologue peel iters set conservatively.");
+                 "prologue peel iters set to (vf-1)/2.");
 
       /* If peeling for alignment is unknown, loop bound of main loop becomes
          unknown.  */
-      peel_iters_epilogue = vf - 1;
+      peel_iters_epilogue = (vf - 1)/2;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "cost model: "
-                 "epilogue peel iters set conservatively because "
+                 "epilogue peel iters set to (vf-1)/2 because "
                  "peeling for alignment is unknown .");
     }
   else 
@@ -186,10 +214,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
 
       if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
         {
-          peel_iters_epilogue = vf - 1;
+          peel_iters_epilogue = (vf - 1)/2;
           if (vect_print_dump_info (REPORT_DETAILS))
             fprintf (vect_dump, "cost model: "
-                     "epilogue peel iters set conservatively because "
+                     "epilogue peel iters set to (vf-1)/2 because "
                      "loop iterations are unknown .");
         }
       else      
@@ -229,6 +257,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
   vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
                       + (peel_iters_epilogue * scalar_single_iter_cost);
 
+  /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
+     information we provide for the target is whether testing against the
+     threshold involves a runtime test.  */
+  if (targetm.vectorize.builtin_vectorization_cost)
+    {
+      bool runtime_test = false;
+
+      /* If the number of iterations is unknown, or the
+	 peeling-for-misalignment amount is unknown, we eill have to generate
+	 a runtime test to test the loop count agains the threshold.  */
+      if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+	  || (byte_misalign < 0))
+	runtime_test = true;
+      vec_outside_cost +=
+	targetm.vectorize.builtin_vectorization_cost (runtime_test);
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
+		  targetm.vectorize.builtin_vectorization_cost (runtime_test));
+    }
+
   /* Calculate number of iterations required to make the vector version 
      profitable, relative to the loop bodies only. The following condition
      must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
@@ -280,7 +328,14 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
 	       min_profitable_iters < vf ? vf : min_profitable_iters);
     }
 
-  return min_profitable_iters < vf ? vf : min_profitable_iters;
+  min_profitable_iters = 
+	min_profitable_iters < vf ? vf : min_profitable_iters;
+
+  /* Because the condition we create is:
+     if (niters <= min_profitable_iters)
+       then skip the vectorized loop.  */
+  min_profitable_iters--;
+  return min_profitable_iters;
 }
 
 
@@ -321,7 +376,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
   code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
 
   /* Add in cost for initial definition.  */
-  outer_cost += TARG_VEC_STMT_COST;
+  outer_cost += TARG_SCALAR_TO_VEC_COST;
 
   /* Determine cost of epilogue code.
 
@@ -341,11 +396,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
       optab = optab_for_tree_code (code, vectype);
 
       /* We have a whole vector shift available.  */
-      if (!VECTOR_MODE_P (mode) 
-          || optab->handlers[mode].insn_code == CODE_FOR_nothing)
+      if (VECTOR_MODE_P (mode)
+	  && optab->handlers[mode].insn_code != CODE_FOR_nothing
+	  && vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
         /* Final reduction via vector shifts and the reduction operator. Also
            requires scalar extract.  */
-	outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST); 
+	outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
+			+ TARG_VEC_TO_SCALAR_COST); 
       else
 	/* Use extracts and reduction op for final reduction.  For N elements,
            we have N extracts and N-1 reduction ops.  */
author	Dorit Nuzman <dorit@il.ibm.com>	2007-07-12 12:17:03 +0000
committer	Dorit Nuzman <dorit@gcc.gnu.org>	2007-07-12 12:17:03 +0000
commit	e95b59d2abbeea533bfcea315ed2b47412f21470 (patch)
tree	85414467a7c6b6b6755c5dbe54511f8981c7db54 /gcc/tree-vect-transform.c
parent	e1c8221962aa8dfba5b2462449bccfe10c2d561e (diff)
download	gcc-e95b59d2abbeea533bfcea315ed2b47412f21470.zip gcc-e95b59d2abbeea533bfcea315ed2b47412f21470.tar.gz gcc-e95b59d2abbeea533bfcea315ed2b47412f21470.tar.bz2