aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-03 07:14:07 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-03 07:14:07 +0000
commitd9f21f6acb3aa615834e855e16b6311cd18c5668 (patch)
tree8200866be9cf8f8f2c40f1a190b6dc611888dd8b /gcc/tree-vect-loop.c
parentfba05d9e9a0321c812ddbda7b4caa3977e1db4ef (diff)
downloadgcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.zip
gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.tar.gz
gcc-d9f21f6acb3aa615834e855e16b6311cd18c5668.tar.bz2
poly_int: vectoriser vf and uf
This patch changes the type of the vectorisation factor and SLP unrolling factor to poly_uint64. This in turn required some knock-on changes in signedness elsewhere. Cost decisions are generally based on estimated_poly_value, which for VF is wrapped up as vect_vf_for_cost. The patch doesn't on its own enable variable-length vectorisation. It just makes the minimum changes necessary for the code to build with the new VF and UF types. Later patches also make the vectoriser cope with variable TYPE_VECTOR_SUBPARTS and variable GET_MODE_NUNITS, at which point the code really does handle variable-length vectors. The patch also changes MAX_VECTORIZATION_FACTOR to INT_MAX, to avoid hard-coding a particular architectural limit. The patch includes a new test because a development version of the patch accidentally used file print routines instead of dump_*, which would fail with -fopt-info. 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree-vectorizer.h (_slp_instance::unrolling_factor): Change from an unsigned int to a poly_uint64. (_loop_vec_info::slp_unrolling_factor): Likewise. (_loop_vec_info::vectorization_factor): Change from an int to a poly_uint64. (MAX_VECTORIZATION_FACTOR): Bump from 64 to INT_MAX. (vect_get_num_vectors): New function. (vect_update_max_nunits, vect_vf_for_cost): Likewise. (vect_get_num_copies): Use vect_get_num_vectors. (vect_analyze_data_ref_dependences): Change max_vf from an int * to an unsigned int *. (vect_analyze_data_refs): Change min_vf from an int * to a poly_uint64 *. (vect_transform_slp_perm_load): Take the vf as a poly_uint64 rather than an unsigned HOST_WIDE_INT. * tree-vect-data-refs.c (vect_analyze_possibly_independent_ddr) (vect_analyze_data_ref_dependence): Change max_vf from an int * to an unsigned int *. (vect_analyze_data_ref_dependences): Likewise. (vect_compute_data_ref_alignment): Handle polynomial vf. (vect_enhance_data_refs_alignment): Likewise. (vect_prune_runtime_alias_test_list): Likewise. (vect_shift_permute_load_chain): Likewise. (vect_supportable_dr_alignment): Likewise. (dependence_distance_ge_vf): Take the vectorization factor as a poly_uint64 rather than an unsigned HOST_WIDE_INT. (vect_analyze_data_refs): Change min_vf from an int * to a poly_uint64 *. * tree-vect-loop-manip.c (vect_gen_scalar_loop_niters): Take vfm1 as a poly_uint64 rather than an int. Make the same change for the returned bound_scalar. (vect_gen_vector_loop_niters): Handle polynomial vf. (vect_do_peeling): Likewise. Update call to vect_gen_scalar_loop_niters and handle polynomial bound_scalars. (vect_gen_vector_loop_niters_mult_vf): Assert that the vf must be constant. * tree-vect-loop.c (vect_determine_vectorization_factor) (vect_update_vf_for_slp, vect_analyze_loop_2): Handle polynomial vf. (vect_get_known_peeling_cost): Likewise. (vect_estimate_min_profitable_iters, vectorizable_reduction): Likewise. (vect_worthwhile_without_simd_p, vectorizable_induction): Likewise. (vect_transform_loop): Likewise. Use the lowest possible VF when updating the upper bounds of the loop. (vect_min_worthwhile_factor): Make static. Return an unsigned int rather than an int. * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Cope with polynomial unroll factors. (vect_analyze_slp_cost_1, vect_analyze_slp_instance): Likewise. (vect_make_slp_decision): Likewise. (vect_supported_load_permutation_p): Likewise, and polynomial vf too. (vect_analyze_slp_cost): Handle polynomial vf. (vect_slp_analyze_node_operations): Likewise. (vect_slp_analyze_bb_1): Likewise. (vect_transform_slp_perm_load): Take the vf as a poly_uint64 rather than an unsigned HOST_WIDE_INT. * tree-vect-stmts.c (vectorizable_simd_clone_call, vectorizable_store) (vectorizable_load): Handle polynomial vf. * tree-vectorizer.c (simduid_to_vf::vf): Change from an int to a poly_uint64. (adjust_simduid_builtins, shrink_simd_arrays): Update accordingly. gcc/testsuite/ * gcc.dg/vect-opt-info-1.c: New test. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256126
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c223
1 files changed, 124 insertions, 99 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index ddb8841..4c57297 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -185,11 +185,10 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
unsigned nbbs = loop->num_nodes;
- unsigned int vectorization_factor = 0;
+ poly_uint64 vectorization_factor = 1;
tree scalar_type = NULL_TREE;
gphi *phi;
tree vectype;
- unsigned int nunits;
stmt_vec_info stmt_info;
unsigned i;
HOST_WIDE_INT dummy;
@@ -258,14 +257,12 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
dump_printf (MSG_NOTE, "\n");
}
- nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
- nunits);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "nunits = " HOST_WIDE_INT_PRINT_DEC "\n",
+ TYPE_VECTOR_SUBPARTS (vectype));
- if (!vectorization_factor
- || (nunits > vectorization_factor))
- vectorization_factor = nunits;
+ vect_update_max_nunits (&vectorization_factor, vectype);
}
}
@@ -553,12 +550,12 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
dump_printf (MSG_NOTE, "\n");
}
- nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
- if (!vectorization_factor
- || (nunits > vectorization_factor))
- vectorization_factor = nunits;
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "nunits = " HOST_WIDE_INT_PRINT_DEC "\n",
+ TYPE_VECTOR_SUBPARTS (vf_vectype));
+
+ vect_update_max_nunits (&vectorization_factor, vf_vectype);
if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
{
@@ -570,9 +567,13 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
/* TODO: Analyze cost. Decide if worth while to vectorize. */
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
- vectorization_factor);
- if (vectorization_factor <= 1)
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
+ dump_dec (MSG_NOTE, vectorization_factor);
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ if (known_le (vectorization_factor, 1U))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1564,7 +1565,7 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo)
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
- unsigned int vectorization_factor;
+ poly_uint64 vectorization_factor;
int i;
if (dump_enabled_p ())
@@ -1572,7 +1573,7 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo)
"=== vect_update_vf_for_slp ===\n");
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- gcc_assert (vectorization_factor != 0);
+ gcc_assert (known_ne (vectorization_factor, 0U));
/* If all the stmts in the loop can be SLPed, we perform only SLP, and
vectorization factor of the loop is the unrolling factor required by
@@ -1612,16 +1613,22 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo)
{
dump_printf_loc (MSG_NOTE, vect_location,
"Loop contains SLP and non-SLP stmts\n");
+ /* Both the vectorization factor and unroll factor have the form
+ current_vector_size * X for some rational X, so they must have
+ a common multiple. */
vectorization_factor
- = least_common_multiple (vectorization_factor,
+ = force_common_multiple (vectorization_factor,
LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
}
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Updating vectorization factor to %d\n",
- vectorization_factor);
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Updating vectorization factor to ");
+ dump_dec (MSG_NOTE, vectorization_factor);
+ dump_printf (MSG_NOTE, ".\n");
+ }
}
/* Function vect_analyze_loop_operations.
@@ -1792,8 +1799,8 @@ static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
{
bool ok;
- int max_vf = MAX_VECTORIZATION_FACTOR;
- int min_vf = 2;
+ unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
+ poly_uint64 min_vf = 2;
unsigned int n_stmts = 0;
/* The first group of checks is independent of the vector size. */
@@ -1918,7 +1925,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
if (!ok
- || max_vf < min_vf)
+ || (max_vf != MAX_VECTORIZATION_FACTOR
+ && maybe_lt (max_vf, min_vf)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1935,7 +1943,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
"can't determine vectorization factor.\n");
return false;
}
- if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ if (max_vf != MAX_VECTORIZATION_FACTOR
+ && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1946,7 +1955,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
/* Compute the scalar iteration cost. */
vect_compute_single_scalar_iteration_cost (loop_vinfo);
- int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
HOST_WIDE_INT estimated_niter;
unsigned th;
int min_scalar_loop_bound;
@@ -1971,21 +1980,25 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
start_over:
/* Now the vectorization factor is final. */
- unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- gcc_assert (vectorization_factor != 0);
+ poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ gcc_assert (known_ne (vectorization_factor, 0U));
+ unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vectorization_factor = %d, niters = "
- HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
- LOOP_VINFO_INT_NITERS (loop_vinfo));
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vectorization_factor = ");
+ dump_dec (MSG_NOTE, vectorization_factor);
+ dump_printf (MSG_NOTE, ", niters = " HOST_WIDE_INT_PRINT_DEC "\n",
+ LOOP_VINFO_INT_NITERS (loop_vinfo));
+ }
HOST_WIDE_INT max_niter
= likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
+ && (LOOP_VINFO_INT_NITERS (loop_vinfo) < assumed_vf))
|| (max_niter != -1
- && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
+ && (unsigned HOST_WIDE_INT) max_niter < assumed_vf))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2057,10 +2070,10 @@ start_over:
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
&& LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
- if (wi::to_widest (scalar_niters) < vf)
+ if (known_lt (wi::to_widest (scalar_niters), vf))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -2088,7 +2101,7 @@ start_over:
}
min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
- * vectorization_factor);
+ * assumed_vf);
/* Use the cost model only if it is more conservative than user specified
threshold. */
@@ -2133,26 +2146,27 @@ start_over:
/* Decide whether we need to create an epilogue loop to handle
remaining scalar iterations. */
- th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo)
- / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- * LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
+ unsigned HOST_WIDE_INT const_vf;
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
- if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
- - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
- < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
}
else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
- || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
- < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- /* In case of versioning, check if the maximum number of
- iterations is greater than th. If they are identical,
- the epilogue is unnecessary. */
+ || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
+ || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+ < (unsigned) exact_log2 (const_vf))
+ /* In case of versioning, check if the maximum number of
+ iterations is greater than th. If they are identical,
+ the epilogue is unnecessary. */
&& (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
- || (unsigned HOST_WIDE_INT) max_niter > th)))
+ || ((unsigned HOST_WIDE_INT) max_niter
+ > (th / const_vf) * const_vf))))
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
/* If an epilogue loop is required make sure we can create one. */
@@ -2201,8 +2215,8 @@ start_over:
LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
}
- gcc_assert (vectorization_factor
- == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ gcc_assert (known_eq (vectorization_factor,
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
/* Ok to vectorize! */
return true;
@@ -3283,11 +3297,11 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
stmt_vector_for_cost *epilogue_cost_vec)
{
int retval = 0;
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ int assumed_vf = vect_vf_for_cost (loop_vinfo);
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
- *peel_iters_epilogue = vf/2;
+ *peel_iters_epilogue = assumed_vf / 2;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"cost model: epilogue peel iters set to vf/2 "
@@ -3305,11 +3319,11 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
peel_iters_prologue = niters < peel_iters_prologue ?
niters : peel_iters_prologue;
- *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+ *peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
/* If we need to peel for gaps, but no peeling is required, we have to
peel VF iterations. */
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
- *peel_iters_epilogue = vf;
+ *peel_iters_epilogue = assumed_vf;
}
stmt_info_for_cost *si;
@@ -3367,7 +3381,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
unsigned vec_epilogue_cost = 0;
int scalar_single_iter_cost = 0;
int scalar_outside_cost = 0;
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ int assumed_vf = vect_vf_for_cost (loop_vinfo);
int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
@@ -3446,13 +3460,13 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
if (npeel < 0)
{
- peel_iters_prologue = vf/2;
+ peel_iters_prologue = assumed_vf / 2;
dump_printf (MSG_NOTE, "cost model: "
"prologue peel iters set to vf/2.\n");
/* If peeling for alignment is unknown, loop bound of main loop becomes
unknown. */
- peel_iters_epilogue = vf/2;
+ peel_iters_epilogue = assumed_vf / 2;
dump_printf (MSG_NOTE, "cost model: "
"epilogue peel iters set to vf/2 because "
"peeling for alignment is unknown.\n");
@@ -3631,22 +3645,24 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
SOC = scalar outside cost for run time cost model check. */
- if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
+ if ((scalar_single_iter_cost * assumed_vf) > (int) vec_inside_cost)
{
if (vec_outside_cost <= 0)
min_profitable_iters = 0;
else
{
- min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
+ min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
+ * assumed_vf
- vec_inside_cost * peel_iters_prologue
- - vec_inside_cost * peel_iters_epilogue)
- / ((scalar_single_iter_cost * vf)
- - vec_inside_cost);
-
- if ((scalar_single_iter_cost * vf * min_profitable_iters)
- <= (((int) vec_inside_cost * min_profitable_iters)
- + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
- min_profitable_iters++;
+ - vec_inside_cost * peel_iters_epilogue)
+ / ((scalar_single_iter_cost * assumed_vf)
+ - vec_inside_cost);
+
+ if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
+ <= (((int) vec_inside_cost * min_profitable_iters)
+ + (((int) vec_outside_cost - scalar_outside_cost)
+ * assumed_vf)))
+ min_profitable_iters++;
}
}
/* vector version will never be profitable. */
@@ -3662,7 +3678,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
"divided by the scalar iteration cost = %d "
"is greater or equal to the vectorization factor = %d"
".\n",
- vec_inside_cost, scalar_single_iter_cost, vf);
+ vec_inside_cost, scalar_single_iter_cost, assumed_vf);
*ret_min_profitable_niters = -1;
*ret_min_profitable_estimate = -1;
return;
@@ -3673,8 +3689,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
min_profitable_iters);
/* We want the vectorized loop to execute at least once. */
- if (min_profitable_iters < (vf + peel_iters_prologue))
- min_profitable_iters = vf + peel_iters_prologue;
+ if (min_profitable_iters < (assumed_vf + peel_iters_prologue))
+ min_profitable_iters = assumed_vf + peel_iters_prologue;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -3694,10 +3710,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
min_profitable_estimate = 0;
else
{
- min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
+ min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
+ * assumed_vf
- vec_inside_cost * peel_iters_prologue
- vec_inside_cost * peel_iters_epilogue)
- / ((scalar_single_iter_cost * vf)
+ / ((scalar_single_iter_cost * assumed_vf)
- vec_inside_cost);
}
min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
@@ -5742,9 +5759,10 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
if (slp_node)
/* The size vect_schedule_slp_instance computes is off for us. */
- vec_num = ((LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- * SLP_TREE_SCALAR_STMTS (slp_node).length ())
- / TYPE_VECTOR_SUBPARTS (vectype_in));
+ vec_num = vect_get_num_vectors
+ (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ * SLP_TREE_SCALAR_STMTS (slp_node).length (),
+ vectype_in);
else
vec_num = 1;
@@ -6540,7 +6558,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
For a loop where we could vectorize the operation indicated by CODE,
return the minimum vectorization factor that makes it worthwhile
to use generic vectors. */
-int
+static unsigned int
vect_min_worthwhile_factor (enum tree_code code)
{
switch (code)
@@ -6569,9 +6587,10 @@ bool
vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
{
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+ unsigned HOST_WIDE_INT value;
return (loop_vinfo
- && (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- >= vect_min_worthwhile_factor (code)));
+ && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value)
+ && value >= vect_min_worthwhile_factor (code));
}
/* Function vectorizable_induction
@@ -6601,7 +6620,7 @@ vectorizable_induction (gimple *phi,
gphi *induction_phi;
tree induc_def, vec_dest;
tree init_expr, step_expr;
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned i;
tree expr;
gimple_seq stmts;
@@ -7337,7 +7356,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
tree niters_vector = NULL_TREE;
tree step_vector = NULL_TREE;
tree niters_vector_mult_vf = NULL_TREE;
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ unsigned int lowest_vf = constant_lower_bound (vf);
bool grouped_store;
bool slp_scheduled = false;
gimple *stmt, *pattern_stmt;
@@ -7345,7 +7365,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
gimple_stmt_iterator pattern_def_si = gsi_none ();
bool transform_pattern_stmt = false;
bool check_profitability = false;
- int th;
+ unsigned int th;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
@@ -7353,10 +7373,10 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* Use the more conservative vectorization threshold. If the number
of iterations is constant assume the cost check has been performed
by our caller. If the threshold makes all loops profitable that
- run at least the vectorization factor number of times checking
- is pointless, too. */
+ run at least the (estimated) vectorization factor number of times
+ checking is pointless, too. */
th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
- if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ if (th >= vect_vf_for_cost (loop_vinfo)
&& !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
if (dump_enabled_p ())
@@ -7420,11 +7440,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
check_profitability, niters_no_overflow);
if (niters_vector == NULL_TREE)
{
- if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && known_eq (lowest_vf, vf))
{
niters_vector
= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
- LOOP_VINFO_INT_NITERS (loop_vinfo) / vf);
+ LOOP_VINFO_INT_NITERS (loop_vinfo) / lowest_vf);
step_vector = build_one_cst (TREE_TYPE (niters));
}
else
@@ -7471,8 +7491,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
continue;
if (STMT_VINFO_VECTYPE (stmt_info)
- && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
- != (unsigned HOST_WIDE_INT) vf)
+ && (maybe_ne
+ (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), vf))
&& dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
@@ -7608,7 +7628,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
= (unsigned int)
TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
if (!STMT_SLP_TYPE (stmt_info)
- && nunits != (unsigned int) vf
+ && maybe_ne (nunits, vf)
&& dump_enabled_p ())
/* For SLP VF is set according to unrolling factor, and not
to vector size, hence for SLP this print is not valid. */
@@ -7688,7 +7708,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
niters_vector_mult_vf,
!niters_no_overflow);
- scale_profile_for_vect_loop (loop, vf);
+ unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
+ scale_profile_for_vect_loop (loop, assumed_vf);
/* The minimum number of iterations performed by the epilogue. This
is 1 when peeling for gaps because we always need a final scalar
@@ -7702,13 +7723,16 @@ vect_transform_loop (loop_vec_info loop_vinfo)
back to latch counts. */
if (loop->any_upper_bound)
loop->nb_iterations_upper_bound
- = wi::udiv_floor (loop->nb_iterations_upper_bound + bias, vf) - 1;
+ = wi::udiv_floor (loop->nb_iterations_upper_bound + bias,
+ lowest_vf) - 1;
if (loop->any_likely_upper_bound)
loop->nb_iterations_likely_upper_bound
- = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + bias, vf) - 1;
+ = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + bias,
+ lowest_vf) - 1;
if (loop->any_estimate)
loop->nb_iterations_estimate
- = wi::udiv_floor (loop->nb_iterations_estimate + bias, vf) - 1;
+ = wi::udiv_floor (loop->nb_iterations_estimate + bias,
+ assumed_vf) - 1;
if (dump_enabled_p ())
{
@@ -7752,17 +7776,18 @@ vect_transform_loop (loop_vec_info loop_vinfo)
else if (!vector_sizes)
epilogue = NULL;
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0
+ && known_eq (vf, lowest_vf))
{
int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
int ratio = current_vector_size / smallest_vec_size;
- int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
+ unsigned HOST_WIDE_INT eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
- eiters = eiters % vf;
+ eiters = eiters % lowest_vf;
epilogue->nb_iterations_upper_bound = eiters - 1;
- if (eiters < vf / ratio)
+ if (eiters < lowest_vf / ratio)
epilogue = NULL;
}
}