aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-stmts.c
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2012-02-10 16:38:37 +0000
committerWilliam Schmidt <wschmidt@gcc.gnu.org>2012-02-10 16:38:37 +0000
commit8bd373026eaf4fe6d6ce7c5047f10dd85e29b942 (patch)
tree1455fc4d37f458d4ad3a9c635dedf65878af17e6 /gcc/tree-vect-stmts.c
parent19f326e8ec1917f33e54e165e036f2c6ff091ce8 (diff)
downloadgcc-8bd373026eaf4fe6d6ce7c5047f10dd85e29b942.zip
gcc-8bd373026eaf4fe6d6ce7c5047f10dd85e29b942.tar.gz
gcc-8bd373026eaf4fe6d6ce7c5047f10dd85e29b942.tar.bz2
re PR tree-optimization/50031 (Sphinx3 has a 10% regression going from GCC 4.5 to GCC 4.6 on powerpc)
2012-02-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com> Ira Rosen <irar@il.ibm.com> PR tree-optimization/50031 * targhooks.c (default_builtin_vectorization_cost): Handle vec_promote_demote. * target.h (enum vect_cost_for_stmt): Add vec_promote_demote. * tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle all types of reduction and pattern statements. (vect_estimate_min_profitable_iters): Likewise. * tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function. (vect_get_load_cost): Use vec_perm for permutations; add dump logic for explicit realigns. (vectorizable_conversion): Call vect_model_promotion_demotion_cost. * config/spu/spu.c (spu_builtin_vectorization_cost): Handle vec_promote_demote. * config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise. * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update vec_perm for VSX and handle vec_promote_demote. Co-Authored-By: Ira Rosen <irar@il.ibm.com> From-SVN: r184102
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r--gcc/tree-vect-stmts.c62
1 files changed, 56 insertions, 6 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index e854da5..20f10f3 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -811,6 +811,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
}
+/* Model cost for type demotion and promotion operations. PWR is normally
+ zero for single-step promotions and demotions. It will be one if
+ two-step promotion/demotion is required, and so on. Each additional
+ step doubles the number of instructions required. */
+
+static void
+vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
+ enum vect_def_type *dt, int pwr)
+{
+ int i, tmp;
+ int inside_cost = 0, outside_cost = 0, single_stmt_cost;
+
+ /* The SLP costs were already calculated during SLP tree build. */
+ if (PURE_SLP_STMT (stmt_info))
+ return;
+
+ single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
+ for (i = 0; i < pwr + 1; i++)
+ {
+ tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
+ (i + 1) : i;
+ inside_cost += vect_pow2 (tmp) * single_stmt_cost;
+ }
+
+ /* FORNOW: Assuming maximum 2 args per stmts. */
+ for (i = 0; i < 2; i++)
+ {
+ if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
+ outside_cost += vect_get_stmt_cost (vector_stmt);
+ }
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
+ "outside_cost = %d .", inside_cost, outside_cost);
+
+ /* Set the costs in STMT_INFO. */
+ stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
+ stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
+}
+
/* Function vect_cost_strided_group_size
For strided load or store, return the group_size only if it is the first
@@ -887,7 +927,6 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
group_size);
-
}
/* Costs of the stores. */
@@ -1049,7 +1088,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
case dr_explicit_realign:
{
*inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
- + vect_get_stmt_cost (vector_stmt));
+ + vect_get_stmt_cost (vec_perm));
/* FIXME: If the misalignment remains fixed across the iterations of
the containing loop, the following cost should be added to the
@@ -1057,6 +1096,9 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
if (targetm.vectorize.builtin_mask_for_load)
*inside_cost += vect_get_stmt_cost (vector_stmt);
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_load_cost: explicit realign");
+
break;
}
case dr_explicit_realign_optimized:
@@ -1080,7 +1122,12 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
}
*inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
- + vect_get_stmt_cost (vector_stmt));
+ + vect_get_stmt_cost (vec_perm));
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump,
+ "vect_model_load_cost: explicit realign optimized");
+
break;
}
@@ -2392,16 +2439,19 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vectorizable_conversion ===");
if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
- STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+ {
+ STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+ vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+ }
else if (modifier == NARROW)
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+ vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
}
else
{
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
- vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
+ vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
}
VEC_free (tree, heap, interm_types);
return true;