diff options
author | Bill Schmidt <wschmidt@linux.vnet.ibm.com> | 2012-02-10 16:38:37 +0000 |
---|---|---|
committer | William Schmidt <wschmidt@gcc.gnu.org> | 2012-02-10 16:38:37 +0000 |
commit | 8bd373026eaf4fe6d6ce7c5047f10dd85e29b942 (patch) | |
tree | 1455fc4d37f458d4ad3a9c635dedf65878af17e6 /gcc/tree-vect-stmts.c | |
parent | 19f326e8ec1917f33e54e165e036f2c6ff091ce8 (diff) | |
download | gcc-8bd373026eaf4fe6d6ce7c5047f10dd85e29b942.zip gcc-8bd373026eaf4fe6d6ce7c5047f10dd85e29b942.tar.gz gcc-8bd373026eaf4fe6d6ce7c5047f10dd85e29b942.tar.bz2 |
re PR tree-optimization/50031 (Sphinx3 has a 10% regression going from GCC 4.5 to GCC 4.6 on powerpc)
2012-02-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Ira Rosen <irar@il.ibm.com>
PR tree-optimization/50031
* targhooks.c (default_builtin_vectorization_cost): Handle
vec_promote_demote.
* target.h (enum vect_cost_for_stmt): Add vec_promote_demote.
* tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle
all types of reduction and pattern statements.
(vect_estimate_min_profitable_iters): Likewise.
* tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function.
(vect_get_load_cost): Use vec_perm for permutations; add dump logic
for explicit realigns.
(vectorizable_conversion): Call vect_model_promotion_demotion_cost.
* config/spu/spu.c (spu_builtin_vectorization_cost): Handle
vec_promote_demote.
* config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update
vec_perm for VSX and handle vec_promote_demote.
Co-Authored-By: Ira Rosen <irar@il.ibm.com>
From-SVN: r184102
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r-- | gcc/tree-vect-stmts.c | 62 |
1 files changed, 56 insertions, 6 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index e854da5..20f10f3 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -811,6 +811,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, } +/* Model cost for type demotion and promotion operations. PWR is normally + zero for single-step promotions and demotions. It will be one if + two-step promotion/demotion is required, and so on. Each additional + step doubles the number of instructions required. */ + +static void +vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, + enum vect_def_type *dt, int pwr) +{ + int i, tmp; + int inside_cost = 0, outside_cost = 0, single_stmt_cost; + + /* The SLP costs were already calculated during SLP tree build. */ + if (PURE_SLP_STMT (stmt_info)) + return; + + single_stmt_cost = vect_get_stmt_cost (vec_promote_demote); + for (i = 0; i < pwr + 1; i++) + { + tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ? + (i + 1) : i; + inside_cost += vect_pow2 (tmp) * single_stmt_cost; + } + + /* FORNOW: Assuming maximum 2 args per stmts. */ + for (i = 0; i < 2; i++) + { + if (dt[i] == vect_constant_def || dt[i] == vect_external_def) + outside_cost += vect_get_stmt_cost (vector_stmt); + } + + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, " + "outside_cost = %d .", inside_cost, outside_cost); + + /* Set the costs in STMT_INFO. */ + stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost); + stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost); +} + /* Function vect_cost_strided_group_size For strided load or store, return the group_size only if it is the first @@ -887,7 +927,6 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .", group_size); - } /* Costs of the stores. */ @@ -1049,7 +1088,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, case dr_explicit_realign: { *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load) - + vect_get_stmt_cost (vector_stmt)); + + vect_get_stmt_cost (vec_perm)); /* FIXME: If the misalignment remains fixed across the iterations of the containing loop, the following cost should be added to the @@ -1057,6 +1096,9 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, if (targetm.vectorize.builtin_mask_for_load) *inside_cost += vect_get_stmt_cost (vector_stmt); + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, "vect_model_load_cost: explicit realign"); + break; } case dr_explicit_realign_optimized: @@ -1080,7 +1122,12 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, } *inside_cost += ncopies * (vect_get_stmt_cost (vector_load) - + vect_get_stmt_cost (vector_stmt)); + + vect_get_stmt_cost (vec_perm)); + + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, + "vect_model_load_cost: explicit realign optimized"); + break; } @@ -2392,16 +2439,19 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_conversion ==="); if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR) - STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; + { + STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; + vect_model_simple_cost (stmt_info, ncopies, dt, NULL); + } else if (modifier == NARROW) { STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; - vect_model_simple_cost (stmt_info, ncopies, dt, NULL); + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); } else { STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; - vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL); + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); } VEC_free (tree, heap, interm_types); return true; |