aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r--gcc/tree-vect-loop.cc1869
1 files changed, 638 insertions, 1231 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 3de1ea6..575987e 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1069,10 +1069,12 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
using_decrementing_iv_p (false),
using_select_vl_p (false),
epil_using_partial_vectors_p (false),
+ allow_mutual_alignment (false),
partial_load_store_bias (0),
peeling_for_gaps (false),
peeling_for_niter (false),
early_breaks (false),
+ user_unroll (false),
no_data_dependencies (false),
has_mask_store (false),
scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -3428,27 +3430,50 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
res ? "succeeded" : "failed",
GET_MODE_NAME (loop_vinfo->vector_mode));
- if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1)
+ auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll;
+ if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ /* Check to see if the user wants to unroll or if the target wants to. */
+ && (suggested_unroll_factor > 1 || user_unroll > 1))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
+ if (suggested_unroll_factor == 1)
+ {
+ int assumed_vf = vect_vf_for_cost (loop_vinfo);
+ suggested_unroll_factor = user_unroll / assumed_vf;
+ if (suggested_unroll_factor > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "setting unroll factor to %d based on user requested "
+ "unroll factor %d and suggested vectorization "
+ "factor: %d\n",
+ suggested_unroll_factor, user_unroll, assumed_vf);
+ }
+ }
+
+ if (suggested_unroll_factor > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
"***** Re-trying analysis for unrolling"
" with unroll factor %d and slp %s.\n",
suggested_unroll_factor,
slp_done_for_suggested_uf ? "on" : "off");
- loop_vec_info unroll_vinfo
- = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
- unroll_vinfo->vector_mode = vector_mode;
- unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
- opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
- slp_done_for_suggested_uf);
- if (new_res)
- {
- delete loop_vinfo;
- loop_vinfo = unroll_vinfo;
- }
- else
- delete unroll_vinfo;
+ loop_vec_info unroll_vinfo
+ = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
+ unroll_vinfo->vector_mode = vector_mode;
+ unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
+ opt_result new_res
+ = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
+ slp_done_for_suggested_uf);
+ if (new_res)
+ {
+ delete loop_vinfo;
+ loop_vinfo = unroll_vinfo;
+ LOOP_VINFO_USER_UNROLL (loop_vinfo) = user_unroll > 1;
+ }
+ else
+ delete unroll_vinfo;
+ }
}
/* Remember the autodetected vector mode. */
@@ -3469,13 +3494,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
mode_i += 1;
}
if (mode_i + 1 < vector_modes.length ()
- && VECTOR_MODE_P (autodetected_vector_mode)
- && (related_vector_mode (vector_modes[mode_i + 1],
- GET_MODE_INNER (autodetected_vector_mode))
- == autodetected_vector_mode)
- && (related_vector_mode (autodetected_vector_mode,
- GET_MODE_INNER (vector_modes[mode_i + 1]))
- == vector_modes[mode_i + 1]))
+ && vect_chooses_same_modes_p (autodetected_vector_mode,
+ vector_modes[mode_i + 1]))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -3676,8 +3696,15 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
vector_modes[0] = autodetected_vector_mode;
mode_i = 0;
- bool supports_partial_vectors =
- partial_vectors_supported_p () && param_vect_partial_vector_usage != 0;
+ bool supports_partial_vectors = param_vect_partial_vector_usage != 0;
+ machine_mode mask_mode;
+ if (supports_partial_vectors
+ && !partial_vectors_supported_p ()
+ && !(VECTOR_MODE_P (first_loop_vinfo->vector_mode)
+ && targetm.vectorize.get_mask_mode
+ (first_loop_vinfo->vector_mode).exists (&mask_mode)
+ && SCALAR_INT_MODE_P (mask_mode)))
+ supports_partial_vectors = false;
poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
loop_vec_info orig_loop_vinfo = first_loop_vinfo;
@@ -3697,6 +3724,22 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
break;
continue;
}
+ /* We would need an exhaustive search to find all modes we
+ skipped but that would lead to the same result as the
+ analysis it was skipped for and where we'd could check
+ cached_vf_per_mode against.
+ Check for the autodetected mode, which is the common
+ situation on x86 which does not perform cost comparison. */
+ if (!supports_partial_vectors
+ && maybe_ge (cached_vf_per_mode[0], first_vinfo_vf)
+ && vect_chooses_same_modes_p (autodetected_vector_mode,
+ vector_modes[mode_i]))
+ {
+ mode_i++;
+ if (mode_i == vector_modes.length ())
+ break;
+ continue;
+ }
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -4101,6 +4144,10 @@ pop:
if (op.ops[2] == op.ops[opi])
neg = ! neg;
}
+ /* For an FMA the reduction code is the PLUS if the addition chain
+ is the reduction. */
+ else if (op.code == IFN_FMA && opi == 2)
+ op.code = PLUS_EXPR;
if (CONVERT_EXPR_CODE_P (op.code)
&& tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])))
;
@@ -4646,7 +4693,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
TODO: Consider assigning different costs to different scalar
statements. */
- scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
+ scalar_single_iter_cost = (loop_vinfo->scalar_costs->total_cost ()
+ * param_vect_scalar_cost_multiplier) / 100;
/* Add additional cost for the peeled instructions in prologue and epilogue
loop. (For fully-masked loops there will be no peeling.)
@@ -5470,76 +5518,6 @@ vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo,
}
}
-/* Function get_initial_def_for_reduction
-
- Input:
- REDUC_INFO - the info_for_reduction
- INIT_VAL - the initial value of the reduction variable
- NEUTRAL_OP - a value that has no effect on the reduction, as per
- neutral_op_for_reduction
-
- Output:
- Return a vector variable, initialized according to the operation that
- STMT_VINFO performs. This vector will be used as the initial value
- of the vector of partial results.
-
- The value we need is a vector in which element 0 has value INIT_VAL
- and every other element has value NEUTRAL_OP. */
-
-static tree
-get_initial_def_for_reduction (loop_vec_info loop_vinfo,
- stmt_vec_info reduc_info,
- tree init_val, tree neutral_op)
-{
- class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- tree scalar_type = TREE_TYPE (init_val);
- tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
- tree init_def;
- gimple_seq stmts = NULL;
-
- gcc_assert (vectype);
-
- gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
- || SCALAR_FLOAT_TYPE_P (scalar_type));
-
- gcc_assert (nested_in_vect_loop_p (loop, reduc_info)
- || loop == (gimple_bb (reduc_info->stmt))->loop_father);
-
- if (operand_equal_p (init_val, neutral_op))
- {
- /* If both elements are equal then the vector described above is
- just a splat. */
- neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype), neutral_op);
- init_def = gimple_build_vector_from_val (&stmts, vectype, neutral_op);
- }
- else
- {
- neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype), neutral_op);
- init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
- {
- /* Construct a splat of NEUTRAL_OP and insert INIT_VAL into
- element 0. */
- init_def = gimple_build_vector_from_val (&stmts, vectype,
- neutral_op);
- init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT,
- vectype, init_def, init_val);
- }
- else
- {
- /* Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. */
- tree_vector_builder elts (vectype, 1, 2);
- elts.quick_push (init_val);
- elts.quick_push (neutral_op);
- init_def = gimple_build_vector (&stmts, &elts);
- }
- }
-
- if (stmts)
- vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, stmts);
- return init_def;
-}
-
/* Get at the initial defs for the reduction PHIs for REDUC_INFO,
which performs a reduction involving GROUP_SIZE scalar statements.
NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP
@@ -5951,7 +5929,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
loop-closed PHI of the inner loop which we remember as
def for the reduction PHI generation. */
bool double_reduc = false;
- stmt_vec_info rdef_info = stmt_info;
if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
{
double_reduc = true;
@@ -5981,20 +5958,18 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
auto_vec<tree> reduc_inputs;
int j, i;
vec<tree> &scalar_results = reduc_info->reduc_scalar_results;
- unsigned int group_size = 1, k;
+ unsigned int k;
/* SLP reduction without reduction chain, e.g.,
# a1 = phi <a2, a0>
# b1 = phi <b2, b0>
a2 = operation (a1)
b2 = operation (b1) */
bool slp_reduc
- = (slp_node
- && !REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info)));
+ = !REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info));
bool direct_slp_reduc;
tree induction_index = NULL_TREE;
- if (slp_node)
- group_size = SLP_TREE_LANES (slp_node);
+ unsigned int group_size = SLP_TREE_LANES (slp_node);
if (nested_in_vect_loop_p (loop, stmt_info))
{
@@ -6024,18 +5999,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
/* All statements produce live-out values. */
live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node);
- unsigned vec_num;
- int ncopies;
- if (slp_node)
- {
- vec_num = SLP_TREE_VEC_DEFS (slp_node_instance->reduc_phis).length ();
- ncopies = 1;
- }
- else
- {
- vec_num = 1;
- ncopies = STMT_VINFO_VEC_STMTS (reduc_info).length ();
- }
+ unsigned vec_num
+ = SLP_TREE_VEC_DEFS (slp_node_instance->reduc_phis).length ();
/* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
which is updated with the current index of the loop for every match of
@@ -6050,53 +6015,29 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
{
auto_vec<std::pair<tree, bool>, 2> ccompares;
- if (slp_node)
+ slp_tree cond_node = slp_node_instance->root;
+ while (cond_node != slp_node_instance->reduc_phis)
{
- slp_tree cond_node = slp_node_instance->root;
- while (cond_node != slp_node_instance->reduc_phis)
+ stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node);
+ int slp_reduc_idx;
+ if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
{
- stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node);
- int slp_reduc_idx;
- if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
- {
- gimple *vec_stmt
- = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (cond_node)[0]);
- gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
- ccompares.safe_push
- (std::make_pair (gimple_assign_rhs1 (vec_stmt),
- STMT_VINFO_REDUC_IDX (cond_info) == 2));
- /* ??? We probably want to have REDUC_IDX on the SLP node?
- We have both three and four children COND_EXPR nodes
- dependent on whether the comparison is still embedded
- as GENERIC. So work backwards. */
- slp_reduc_idx = (SLP_TREE_CHILDREN (cond_node).length () - 3
- + STMT_VINFO_REDUC_IDX (cond_info));
- }
- else
- slp_reduc_idx = STMT_VINFO_REDUC_IDX (cond_info);
- cond_node = SLP_TREE_CHILDREN (cond_node)[slp_reduc_idx];
- }
- }
- else
- {
- stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info);
- cond_info = vect_stmt_to_vectorize (cond_info);
- while (cond_info != reduc_info)
- {
- if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
- {
- gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)[0];
- gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
- ccompares.safe_push
- (std::make_pair (gimple_assign_rhs1 (vec_stmt),
- STMT_VINFO_REDUC_IDX (cond_info) == 2));
- }
- cond_info
- = loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
- 1 + STMT_VINFO_REDUC_IDX
- (cond_info)));
- cond_info = vect_stmt_to_vectorize (cond_info);
+ gimple *vec_stmt
+ = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (cond_node)[0]);
+ gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
+ ccompares.safe_push
+ (std::make_pair (gimple_assign_rhs1 (vec_stmt),
+ STMT_VINFO_REDUC_IDX (cond_info) == 2));
+ /* ??? We probably want to have REDUC_IDX on the SLP node?
+ We have both three and four children COND_EXPR nodes
+ dependent on whether the comparison is still embedded
+ as GENERIC. So work backwards. */
+ slp_reduc_idx = (SLP_TREE_CHILDREN (cond_node).length () - 3
+ + STMT_VINFO_REDUC_IDX (cond_info));
}
+ else
+ slp_reduc_idx = STMT_VINFO_REDUC_IDX (cond_info);
+ cond_node = SLP_TREE_CHILDREN (cond_node)[slp_reduc_idx];
}
gcc_assert (ccompares.length () != 0);
@@ -6123,7 +6064,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
/* Create an induction variable. */
gimple_stmt_iterator incr_gsi;
bool insert_after;
- vect_iv_increment_position (loop_exit, &incr_gsi, &insert_after);
+ vect_iv_increment_position (LOOP_VINFO_IV_EXIT (loop_vinfo),
+ &incr_gsi, &insert_after);
create_iv (series_vect, PLUS_EXPR, vec_step, NULL_TREE, loop, &incr_gsi,
insert_after, &indx_before_incr, &indx_after_incr);
@@ -6205,30 +6147,22 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
/* We need to reduce values in all exits. */
exit_bb = loop_exit->dest;
exit_gsi = gsi_after_labels (exit_bb);
- reduc_inputs.create (slp_node ? vec_num : ncopies);
+ reduc_inputs.create (vec_num);
for (unsigned i = 0; i < vec_num; i++)
{
gimple_seq stmts = NULL;
- if (slp_node)
- def = vect_get_slp_vect_def (slp_node, i);
+ def = vect_get_slp_vect_def (slp_node, i);
+ tree new_def = copy_ssa_name (def);
+ phi = create_phi_node (new_def, exit_bb);
+ if (LOOP_VINFO_IV_EXIT (loop_vinfo) == loop_exit)
+ SET_PHI_ARG_DEF (phi, loop_exit->dest_idx, def);
else
- def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]);
- for (j = 0; j < ncopies; j++)
- {
- tree new_def = copy_ssa_name (def);
- phi = create_phi_node (new_def, exit_bb);
- if (j)
- def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]);
- if (LOOP_VINFO_IV_EXIT (loop_vinfo) == loop_exit)
- SET_PHI_ARG_DEF (phi, loop_exit->dest_idx, def);
- else
- {
- for (unsigned k = 0; k < gimple_phi_num_args (phi); k++)
- SET_PHI_ARG_DEF (phi, k, def);
- }
- new_def = gimple_convert (&stmts, vectype, new_def);
- reduc_inputs.quick_push (new_def);
+ {
+ for (unsigned k = 0; k < gimple_phi_num_args (phi); k++)
+ SET_PHI_ARG_DEF (phi, k, def);
}
+ new_def = gimple_convert (&stmts, vectype, new_def);
+ reduc_inputs.quick_push (new_def);
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
}
@@ -6284,8 +6218,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info))
|| direct_slp_reduc
|| (slp_reduc
- && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size))
- || ncopies > 1)
+ && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size)))
{
gimple_seq stmts = NULL;
tree single_input = reduc_inputs[0];
@@ -6579,7 +6512,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
is itself a neutral value. */
tree vector_identity = NULL_TREE;
tree neutral_op = NULL_TREE;
- if (slp_node)
+ if (1)
{
tree initial_value = NULL_TREE;
if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info)))
@@ -7088,10 +7021,10 @@ static bool
vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
- gimple **vec_stmt, slp_tree slp_node,
+ slp_tree slp_node,
gimple *reduc_def_stmt,
code_helper code, internal_fn reduc_fn,
- tree *ops, int num_ops, tree vectype_in,
+ int num_ops, tree vectype_in,
int reduc_index, vec_loop_masks *masks,
vec_loop_lens *lens)
{
@@ -7099,14 +7032,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in);
- int ncopies;
- if (slp_node)
- ncopies = 1;
- else
- ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
-
gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
- gcc_assert (ncopies == 1);
bool is_cond_op = false;
if (!code.is_tree_code ())
@@ -7118,9 +7044,8 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
gcc_assert (TREE_CODE_LENGTH (tree_code (code)) == binary_op);
- if (slp_node)
- gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
- TYPE_VECTOR_SUBPARTS (vectype_in)));
+ gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
+ TYPE_VECTOR_SUBPARTS (vectype_in)));
/* The operands either come from a binary operation or an IFN_COND operation.
The former is a gimple assign with binary rhs and the latter is a
@@ -7130,36 +7055,14 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
int group_size = 1;
stmt_vec_info scalar_dest_def_info;
auto_vec<tree> vec_oprnds0, vec_opmask;
- if (slp_node)
- {
- vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0)
- + (1 - reduc_index)],
- &vec_oprnds0);
- group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
- scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
- /* For an IFN_COND_OP we also need the vector mask operand. */
- if (is_cond_op)
- vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask);
- }
- else
- {
- tree op0, opmask;
- if (!is_cond_op)
- op0 = ops[1 - reduc_index];
- else
- {
- op0 = ops[2 + (1 - reduc_index)];
- opmask = ops[0];
- }
- vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
- op0, &vec_oprnds0);
- scalar_dest_def_info = stmt_info;
-
- /* For an IFN_COND_OP we also need the vector mask operand. */
- if (is_cond_op)
- vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
- opmask, &vec_opmask);
- }
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0)
+ + (1 - reduc_index)],
+ &vec_oprnds0);
+ group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
+ scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
+ /* For an IFN_COND_OP we also need the vector mask operand. */
+ if (is_cond_op)
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask);
gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt;
tree scalar_dest = gimple_get_lhs (sdef);
@@ -7167,7 +7070,6 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
tree reduc_var = gimple_phi_result (reduc_def_stmt);
int vec_num = vec_oprnds0.length ();
- gcc_assert (vec_num == 1 || slp_node);
tree vec_elem_type = TREE_TYPE (vectype_out);
gcc_checking_assert (useless_type_conversion_p (scalar_type, vec_elem_type));
@@ -7279,13 +7181,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
scalar_dest_def_info,
new_stmt, gsi);
- if (slp_node)
- slp_node->push_vec_def (new_stmt);
- else
- {
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
- *vec_stmt = new_stmt;
- }
+ slp_node->push_vec_def (new_stmt);
}
return true;
@@ -7543,7 +7439,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
return false;
}
- if (slp_node && !vect_maybe_update_slp_op_vectype (slp_op, vectype))
+ if (!vect_maybe_update_slp_op_vectype (slp_op, vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7682,23 +7578,20 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
{
if (is_a <gphi *> (stmt_info->stmt))
{
- if (slp_node)
- {
- /* We eventually need to set a vector type on invariant
- arguments. */
- unsigned j;
- slp_tree child;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
- if (!vect_maybe_update_slp_op_vectype
- (child, SLP_TREE_VECTYPE (slp_node)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "incompatible vector types for "
- "invariants\n");
- return false;
- }
- }
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
/* Analysis for double-reduction is done on the outer
loop PHI, nested cycles have no further restrictions. */
STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
@@ -7723,7 +7616,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
the outer loop PHI. The latter is what we want to analyze
the reduction with. The LC PHI is handled by
vectorizable_lc_phi. */
- gcc_assert (slp_node);
return gimple_phi_num_args (as_a <gphi *> (stmt_info->stmt)) == 2;
}
use_operand_p use_p;
@@ -7734,13 +7626,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
phi_info = loop_vinfo->lookup_stmt (use_stmt);
}
- if (slp_node)
- {
- slp_node_instance->reduc_phis = slp_node;
- /* ??? We're leaving slp_node to point to the PHIs, we only
- need it to get at the number of vector stmts which wasn't
- yet initialized for the instance root. */
- }
+ slp_node_instance->reduc_phis = slp_node;
+ /* ??? We're leaving slp_node to point to the PHIs, we only
+ need it to get at the number of vector stmts which wasn't
+ yet initialized for the instance root. */
/* PHIs should not participate in patterns. */
gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
@@ -7756,11 +7645,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
unsigned reduc_chain_length = 0;
bool only_slp_reduc_chain = true;
stmt_info = NULL;
- slp_tree slp_for_stmt_info = slp_node ? slp_node_instance->root : NULL;
+ slp_tree slp_for_stmt_info = slp_node_instance->root;
/* For double-reductions we start SLP analysis at the inner loop LC PHI
which is the def of the outer loop live stmt. */
- if (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def
- && slp_node)
+ if (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def)
slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
while (reduc_def != PHI_RESULT (reduc_def_phi))
{
@@ -7863,7 +7751,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
reduc_def = op.ops[reduc_idx];
reduc_chain_length++;
- if (!stmt_info && slp_node)
+ if (!stmt_info)
slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0];
}
/* PHIs should not participate in patterns. */
@@ -7877,14 +7765,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
element. */
- if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+ if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
{
gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
}
if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
- gcc_assert (slp_node
- && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
+ gcc_assert (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
/* 1. Is vectorizable reduction? */
/* Not supportable if the reduction variable is used in the loop, unless
@@ -7939,7 +7826,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
OK to use them in a reduction chain or when the reduction group
has just one element. */
if (lane_reducing
- && slp_node
&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
&& SLP_TREE_LANES (slp_node) > 1)
{
@@ -8017,7 +7903,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* If we have a condition reduction, see if we can simplify it further. */
if (reduction_type == COND_REDUCTION)
{
- if (slp_node && SLP_TREE_LANES (slp_node) != 1)
+ if (SLP_TREE_LANES (slp_node) != 1)
return false;
/* When the condition uses the reduction value in the condition, fail. */
@@ -8119,10 +8005,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
if (STMT_VINFO_LIVE_P (phi_info))
return false;
- if (slp_node)
- ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- else
- ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
+ ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
gcc_assert (ncopies >= 1);
@@ -8194,8 +8077,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
outer-loop vectorization is safe. Likewise when we are vectorizing
a series of reductions using SLP and the VF is one the reductions
are performed in scalar order. */
- if (slp_node
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ if (!REDUC_GROUP_FIRST_ELEMENT (stmt_info)
&& known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1u))
;
else if (needs_fold_left_reduction_p (op.type, orig_code))
@@ -8210,6 +8092,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
"in-order reduction chain without SLP.\n");
return false;
}
+ /* Code generation doesn't support function calls other
+ than .COND_*. */
+ if (!op.code.is_tree_code ()
+ && !(op.code.is_internal_fn ()
+ && conditional_internal_fn_code (internal_fn (op.code))
+ != ERROR_MARK))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "in-order reduction chain operation not "
+ "supported.\n");
+ return false;
+ }
STMT_VINFO_REDUC_TYPE (reduc_info)
= reduction_type = FOLD_LEFT_REDUCTION;
}
@@ -8227,7 +8122,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
|| reduction_type == INTEGER_INDUC_COND_REDUCTION
|| reduction_type == CONST_COND_REDUCTION
|| reduction_type == EXTRACT_LAST_REDUCTION)
- && slp_node
+ && 1
&& ncopies > 1)
{
if (dump_enabled_p ())
@@ -8236,17 +8131,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
}
- if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
- && !slp_node
- && ncopies > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types in double reduction or condition "
- "reduction or fold-left reduction.\n");
- return false;
- }
-
internal_fn reduc_fn = IFN_LAST;
if (reduction_type == TREE_CODE_REDUCTION
|| reduction_type == FOLD_LEFT_REDUCTION
@@ -8308,14 +8192,11 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* For SLP reductions, see if there is a neutral value we can use. */
tree neutral_op = NULL_TREE;
- if (slp_node)
- {
- tree initial_value = NULL_TREE;
- if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL)
- initial_value = vect_phi_initial_value (reduc_def_phi);
- neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out),
- orig_code, initial_value);
- }
+ tree initial_value = NULL_TREE;
+ if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL)
+ initial_value = vect_phi_initial_value (reduc_def_phi);
+ neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out),
+ orig_code, initial_value);
if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
{
@@ -8341,7 +8222,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
}
if (reduction_type == FOLD_LEFT_REDUCTION
- && (slp_node && SLP_TREE_LANES (slp_node) > 1)
+ && SLP_TREE_LANES (slp_node) > 1
&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
{
/* We cannot use in-order reductions in this case because there is
@@ -8370,8 +8251,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
}
/* Check extra constraints for variable-length unchained SLP reductions. */
- if (slp_node
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ if (!REDUC_GROUP_FIRST_ELEMENT (stmt_info)
&& !nunits_out.is_constant ())
{
/* We checked above that we could build the initial vector when
@@ -8465,9 +8345,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
own reduction accumulator since one of the main goals of unrolling a
reduction is to reduce the aggregate loop-carried latency. */
if (ncopies > 1
- && (!slp_node
- || (!REDUC_GROUP_FIRST_ELEMENT (stmt_info)
- && SLP_TREE_LANES (slp_node) == 1))
+ && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ && SLP_TREE_LANES (slp_node) == 1
&& (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
&& reduc_chain_length == 1
&& loop_vinfo->suggested_unroll_factor == 1)
@@ -8518,8 +8397,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
reduction. */
single_defuse_cycle &= !lane_reducing;
- if (slp_node
- && (single_defuse_cycle || reduction_type == FOLD_LEFT_REDUCTION))
+ if (single_defuse_cycle || reduction_type == FOLD_LEFT_REDUCTION)
for (i = 0; i < (int) op.num_ops; i++)
if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_op[i]))
{
@@ -8549,7 +8427,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
{
stmt_vec_info tem
= vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
- if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
+ if (REDUC_GROUP_FIRST_ELEMENT (tem))
{
gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
tem = REDUC_GROUP_FIRST_ELEMENT (tem);
@@ -8645,11 +8523,10 @@ vect_emulate_mixed_dot_prod (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
bool
vect_transform_reduction (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- gimple **vec_stmt, slp_tree slp_node)
+ slp_tree slp_node)
{
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- unsigned ncopies;
unsigned vec_num;
stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
@@ -8677,16 +8554,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
if (!vectype_in)
vectype_in = STMT_VINFO_VECTYPE (stmt_info);
- if (slp_node)
- {
- ncopies = 1;
- vec_num = vect_get_num_copies (loop_vinfo, slp_node, vectype_in);
- }
- else
- {
- ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
- vec_num = 1;
- }
+ vec_num = vect_get_num_copies (loop_vinfo, slp_node, vectype_in);
code_helper code = canonicalize_code (op.code, op.type);
internal_fn cond_fn = get_conditional_internal_fn (code, op.type);
@@ -8702,10 +8570,6 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
- /* FORNOW: Multiple types are not supported for condition. */
- if (code == COND_EXPR)
- gcc_assert (ncopies == 1);
-
/* A binary COND_OP reduction must have the same definition and else
value. */
bool cond_fn_p = code.is_internal_fn ()
@@ -8729,8 +8593,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
gcc_assert (code.is_tree_code () || cond_fn_p);
return vectorize_fold_left_reduction
- (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi,
- code, reduc_fn, op.ops, op.num_ops, vectype_in,
+ (loop_vinfo, stmt_info, gsi, slp_node, reduc_def_phi,
+ code, reduc_fn, op.num_ops, vectype_in,
reduc_index, masks, lens);
}
@@ -8748,33 +8612,12 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
- if (lane_reducing && !slp_node && !single_defuse_cycle)
- {
- /* Note: there are still vectorizable cases that can not be handled by
- single-lane slp. Probably it would take some time to evolve the
- feature to a mature state. So we have to keep the below non-slp code
- path as failsafe for lane-reducing support. */
- gcc_assert (op.num_ops <= 3);
- for (unsigned i = 0; i < op.num_ops; i++)
- {
- unsigned oprnd_ncopies = ncopies;
-
- if ((int) i == reduc_index)
- {
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- oprnd_ncopies = vect_get_num_copies (loop_vinfo, vectype);
- }
-
- vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, oprnd_ncopies,
- op.ops[i], &vec_oprnds[i]);
- }
- }
/* Get NCOPIES vector definitions for all operands except the reduction
definition. */
- else if (!cond_fn_p)
+ if (!cond_fn_p)
{
gcc_assert (reduc_index >= 0 && reduc_index <= 2);
- vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies,
+ vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1,
single_defuse_cycle && reduc_index == 0
? NULL_TREE : op.ops[0], &vec_oprnds[0],
single_defuse_cycle && reduc_index == 1
@@ -8789,7 +8632,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
vectype. */
gcc_assert (single_defuse_cycle
&& (reduc_index == 1 || reduc_index == 2));
- vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, op.ops[0],
+ vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, op.ops[0],
truth_type_for (vectype_in), &vec_oprnds[0],
reduc_index == 1 ? NULL_TREE : op.ops[1],
NULL_TREE, &vec_oprnds[1],
@@ -8945,7 +8788,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
handle over the operand to other dependent statements. */
gcc_assert (reduc_vop);
- if (slp_node && TREE_CODE (reduc_vop) == SSA_NAME
+ if (TREE_CODE (reduc_vop) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (reduc_vop))
new_stmt = SSA_NAME_DEF_STMT (reduc_vop);
else
@@ -8969,7 +8812,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
std::swap (vop[0], vop[1]);
}
tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
- vec_num * ncopies, vectype_in,
+ vec_num, vectype_in,
mask_index++);
gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
vop[0], vop[1], vop[0]);
@@ -8987,7 +8830,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
if (masked_loop_p && mask_by_cond_expr)
{
tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
- vec_num * ncopies, vectype_in,
+ vec_num, vectype_in,
mask_index++);
build_vect_cond_expr (code, vop, mask, gsi);
}
@@ -9015,15 +8858,10 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
if (single_defuse_cycle && i < num - 1)
vec_oprnds[reduc_index].safe_push (gimple_get_lhs (new_stmt));
- else if (slp_node)
- slp_node->push_vec_def (new_stmt);
else
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ slp_node->push_vec_def (new_stmt);
}
- if (!slp_node)
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
-
return true;
}
@@ -9031,14 +8869,12 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
bool
vect_transform_cycle_phi (loop_vec_info loop_vinfo,
- stmt_vec_info stmt_info, gimple **vec_stmt,
+ stmt_vec_info stmt_info,
slp_tree slp_node, slp_instance slp_node_instance)
{
tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
int i;
- int ncopies;
- int j;
bool nested_cycle = false;
int vec_num;
@@ -9058,25 +8894,12 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
/* Leave the scalar phi in place. */
return true;
- if (slp_node)
- {
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- ncopies = 1;
- }
- else
- {
- vec_num = 1;
- ncopies = vect_get_num_copies (loop_vinfo,
- STMT_VINFO_VECTYPE (stmt_info));
- }
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
/* Check whether we should use a single PHI node and accumulate
vectors to one before the backedge. */
if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info))
- {
- ncopies = 1;
- vec_num = 1;
- }
+ vec_num = 1;
/* Create the destination vector */
gphi *phi = as_a <gphi *> (stmt_info->stmt);
@@ -9086,158 +8909,86 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
/* Get the loop-entry arguments. */
tree vec_initial_def = NULL_TREE;
auto_vec<tree> vec_initial_defs;
- if (slp_node)
+ vec_initial_defs.reserve (vec_num);
+ /* Optimize: if initial_def is for REDUC_MAX smaller than the base
+ and we can't use zero for induc_val, use initial_def. Similarly
+ for REDUC_MIN and initial_def larger than the base. */
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
{
- vec_initial_defs.reserve (vec_num);
- /* Optimize: if initial_def is for REDUC_MAX smaller than the base
- and we can't use zero for induc_val, use initial_def. Similarly
- for REDUC_MIN and initial_def larger than the base. */
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
- {
- gcc_assert (SLP_TREE_LANES (slp_node) == 1);
- tree initial_def = vect_phi_initial_value (phi);
- reduc_info->reduc_initial_values.safe_push (initial_def);
- tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
- if (TREE_CODE (initial_def) == INTEGER_CST
- && !integer_zerop (induc_val)
- && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
- && tree_int_cst_lt (initial_def, induc_val))
- || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
- && tree_int_cst_lt (induc_val, initial_def))))
- {
- induc_val = initial_def;
- /* Communicate we used the initial_def to epilouge
- generation. */
- STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
- }
- vec_initial_defs.quick_push
- (build_vector_from_val (vectype_out, induc_val));
- }
- else if (nested_cycle)
+ gcc_assert (SLP_TREE_LANES (slp_node) == 1);
+ tree initial_def = vect_phi_initial_value (phi);
+ reduc_info->reduc_initial_values.safe_push (initial_def);
+ tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
+ if (TREE_CODE (initial_def) == INTEGER_CST
+ && !integer_zerop (induc_val)
+ && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
+ && tree_int_cst_lt (initial_def, induc_val))
+ || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
+ && tree_int_cst_lt (induc_val, initial_def))))
{
- unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
- vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
- &vec_initial_defs);
- }
- else
- {
- gcc_assert (slp_node == slp_node_instance->reduc_phis);
- vec<tree> &initial_values = reduc_info->reduc_initial_values;
- vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node);
-
- unsigned int num_phis = stmts.length ();
- if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
- num_phis = 1;
- initial_values.reserve (num_phis);
- for (unsigned int i = 0; i < num_phis; ++i)
- {
- gphi *this_phi = as_a<gphi *> (stmts[i]->stmt);
- initial_values.quick_push (vect_phi_initial_value (this_phi));
- }
- if (vec_num == 1)
- vect_find_reusable_accumulator (loop_vinfo, reduc_info);
- if (!initial_values.is_empty ())
- {
- tree initial_value
- = (num_phis == 1 ? initial_values[0] : NULL_TREE);
- code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
- tree neutral_op
- = neutral_op_for_reduction (TREE_TYPE (vectype_out),
- code, initial_value);
- /* Try to simplify the vector initialization by applying an
- adjustment after the reduction has been performed. This
- can also break a critical path but on the other hand
- requires to keep the initial value live across the loop. */
- if (neutral_op
- && initial_values.length () == 1
- && !reduc_info->reused_accumulator
- && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
- && !operand_equal_p (neutral_op, initial_values[0]))
- {
- STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)
- = initial_values[0];
- initial_values[0] = neutral_op;
- }
- get_initial_defs_for_reduction (loop_vinfo, reduc_info,
- &vec_initial_defs, vec_num,
- stmts.length (), neutral_op);
- }
+ induc_val = initial_def;
+ /* Communicate we used the initial_def to epilouge
+ generation. */
+ STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
}
+ vec_initial_defs.quick_push
+ (build_vector_from_val (vectype_out, induc_val));
+ }
+ else if (nested_cycle)
+ {
+ unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
+ vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
+ &vec_initial_defs);
}
else
{
- /* Get at the scalar def before the loop, that defines the initial
- value of the reduction variable. */
- tree initial_def = vect_phi_initial_value (phi);
- reduc_info->reduc_initial_values.safe_push (initial_def);
- /* Optimize: if initial_def is for REDUC_MAX smaller than the base
- and we can't use zero for induc_val, use initial_def. Similarly
- for REDUC_MIN and initial_def larger than the base. */
- if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
- {
- tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
- if (TREE_CODE (initial_def) == INTEGER_CST
- && !integer_zerop (induc_val)
- && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
- && tree_int_cst_lt (initial_def, induc_val))
- || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
- && tree_int_cst_lt (induc_val, initial_def))))
+ gcc_assert (slp_node == slp_node_instance->reduc_phis);
+ vec<tree> &initial_values = reduc_info->reduc_initial_values;
+ vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+
+ unsigned int num_phis = stmts.length ();
+ if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
+ num_phis = 1;
+ initial_values.reserve (num_phis);
+ for (unsigned int i = 0; i < num_phis; ++i)
+ {
+ gphi *this_phi = as_a<gphi *> (stmts[i]->stmt);
+ initial_values.quick_push (vect_phi_initial_value (this_phi));
+ }
+ if (vec_num == 1)
+ vect_find_reusable_accumulator (loop_vinfo, reduc_info);
+ if (!initial_values.is_empty ())
+ {
+ tree initial_value
+ = (num_phis == 1 ? initial_values[0] : NULL_TREE);
+ code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
+ tree neutral_op
+ = neutral_op_for_reduction (TREE_TYPE (vectype_out),
+ code, initial_value);
+ /* Try to simplify the vector initialization by applying an
+ adjustment after the reduction has been performed. This
+ can also break a critical path but on the other hand
+ requires to keep the initial value live across the loop. */
+ if (neutral_op
+ && initial_values.length () == 1
+ && !reduc_info->reused_accumulator
+ && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+ && !operand_equal_p (neutral_op, initial_values[0]))
{
- induc_val = initial_def;
- /* Communicate we used the initial_def to epilouge
- generation. */
- STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
- }
- vec_initial_def = build_vector_from_val (vectype_out, induc_val);
- }
- else if (nested_cycle)
- {
- /* Do not use an adjustment def as that case is not supported
- correctly if ncopies is not one. */
- vect_get_vec_defs_for_operand (loop_vinfo, reduc_stmt_info,
- ncopies, initial_def,
- &vec_initial_defs);
- }
- else if (STMT_VINFO_REDUC_TYPE (reduc_info) == CONST_COND_REDUCTION
- || STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
- /* Fill the initial vector with the initial scalar value. */
- vec_initial_def
- = get_initial_def_for_reduction (loop_vinfo, reduc_stmt_info,
- initial_def, initial_def);
- else
- {
- if (ncopies == 1)
- vect_find_reusable_accumulator (loop_vinfo, reduc_info);
- if (!reduc_info->reduc_initial_values.is_empty ())
- {
- initial_def = reduc_info->reduc_initial_values[0];
- code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
- tree neutral_op
- = neutral_op_for_reduction (TREE_TYPE (initial_def),
- code, initial_def);
- gcc_assert (neutral_op);
- /* Try to simplify the vector initialization by applying an
- adjustment after the reduction has been performed. */
- if (!reduc_info->reused_accumulator
- && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
- && !operand_equal_p (neutral_op, initial_def))
- {
- STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)
- = initial_def;
- initial_def = neutral_op;
- }
- vec_initial_def
- = get_initial_def_for_reduction (loop_vinfo, reduc_info,
- initial_def, neutral_op);
+ STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)
+ = initial_values[0];
+ initial_values[0] = neutral_op;
}
+ get_initial_defs_for_reduction (loop_vinfo, reduc_info,
+ &vec_initial_defs, vec_num,
+ stmts.length (), neutral_op);
}
}
if (vec_initial_def)
{
- vec_initial_defs.create (ncopies);
- for (i = 0; i < ncopies; ++i)
- vec_initial_defs.quick_push (vec_initial_def);
+ vec_initial_defs.create (1);
+ vec_initial_defs.quick_push (vec_initial_def);
}
if (auto *accumulator = reduc_info->reused_accumulator)
@@ -9312,29 +9063,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
for (i = 0; i < vec_num; i++)
{
tree vec_init_def = vec_initial_defs[i];
- for (j = 0; j < ncopies; j++)
- {
- /* Create the reduction-phi that defines the reduction
- operand. */
- gphi *new_phi = create_phi_node (vec_dest, loop->header);
-
- /* Set the loop-entry arg of the reduction-phi. */
- if (j != 0 && nested_cycle)
- vec_init_def = vec_initial_defs[j];
- add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop),
- UNKNOWN_LOCATION);
+ /* Create the reduction-phi that defines the reduction
+ operand. */
+ gphi *new_phi = create_phi_node (vec_dest, loop->header);
+ add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop),
+ UNKNOWN_LOCATION);
- /* The loop-latch arg is set in epilogue processing. */
+ /* The loop-latch arg is set in epilogue processing. */
- if (slp_node)
- slp_node->push_vec_def (new_phi);
- else
- {
- if (j == 0)
- *vec_stmt = new_phi;
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_phi);
- }
- }
+ slp_node->push_vec_def (new_phi);
}
return true;
@@ -9344,7 +9081,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
bool
vectorizable_lc_phi (loop_vec_info loop_vinfo,
- stmt_vec_info stmt_info, gimple **vec_stmt,
+ stmt_vec_info stmt_info,
slp_tree slp_node)
{
if (!loop_vinfo
@@ -9356,22 +9093,25 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo,
&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
return false;
- if (!vec_stmt) /* transformation not required. */
+ /* Deal with copies from externs or constants that disguise as
+ loop-closed PHI nodes (PR97886). */
+ if (!vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
+ SLP_TREE_VECTYPE (slp_node)))
{
- /* Deal with copies from externs or constants that disguise as
- loop-closed PHI nodes (PR97886). */
- if (slp_node
- && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
- SLP_TREE_VECTYPE (slp_node)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "incompatible vector types for invariants\n");
- return false;
- }
- STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
- return true;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for invariants\n");
+ return false;
}
+ STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
+ return true;
+}
+
+bool
+vect_transform_lc_phi (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info,
+ slp_tree slp_node)
+{
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree scalar_dest = gimple_phi_result (stmt_info->stmt);
@@ -9379,21 +9119,15 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo,
edge e = single_pred_edge (bb);
tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
auto_vec<tree> vec_oprnds;
- vect_get_vec_defs (loop_vinfo, stmt_info, slp_node,
- !slp_node ? vect_get_num_copies (loop_vinfo, vectype) : 1,
+ vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1,
gimple_phi_arg_def (stmt_info->stmt, 0), &vec_oprnds);
for (unsigned i = 0; i < vec_oprnds.length (); i++)
{
/* Create the vectorized LC PHI node. */
gphi *new_phi = create_phi_node (vec_dest, bb);
add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION);
- if (slp_node)
- slp_node->push_vec_def (new_phi);
- else
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_phi);
+ slp_node->push_vec_def (new_phi);
}
- if (!slp_node)
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
return true;
}
@@ -10026,7 +9760,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
gphi *phi = dyn_cast <gphi *> (stmt_info->stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
enum vect_induction_op_type induction_type
= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
@@ -10051,7 +9785,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
/* TODO: Support multi-lane SLP for nonlinear iv. There should be separate
vector iv update for each iv and a permutation to generate wanted
vector iv. */
- if (slp_node && SLP_TREE_LANES (slp_node) > 1)
+ if (SLP_TREE_LANES (slp_node) > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -10262,13 +9996,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
UNKNOWN_LOCATION);
- if (slp_node)
- slp_node->push_vec_def (induction_phi);
- else
- {
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi);
- *vec_stmt = induction_phi;
- }
+ slp_node->push_vec_def (induction_phi);
/* In case that vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
@@ -10298,10 +10026,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
induction_type);
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
new_stmt = SSA_NAME_DEF_STMT (vec_def);
- if (slp_node)
- slp_node->push_vec_def (new_stmt);
- else
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ slp_node->push_vec_def (new_stmt);
}
}
@@ -10327,15 +10052,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
stmt_vector_for_cost *cost_vec)
{
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- unsigned ncopies;
bool nested_in_vect_loop = false;
class loop *iv_loop;
tree vec_def;
edge pe = loop_preheader_edge (loop);
basic_block new_bb;
- tree new_vec, vec_init = NULL_TREE, vec_step, t;
+ tree vec_init = NULL_TREE, vec_step, t;
tree new_name;
- gimple *new_stmt;
gphi *induction_phi;
tree induc_def, vec_dest;
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -10362,15 +10085,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
return vectorizable_nonlinear_induction (loop_vinfo, stmt_info,
vec_stmt, slp_node, cost_vec);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- if (slp_node)
- ncopies = 1;
- else
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
- gcc_assert (ncopies >= 1);
-
/* FORNOW. These restrictions should be relaxed. */
if (nested_in_vect_loop_p (loop, stmt_info))
{
@@ -10380,14 +10097,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
edge latch_e;
tree loop_arg;
- if (ncopies > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types in nested loop.\n");
- return false;
- }
-
exit_phi = NULL;
latch_e = loop_latch_edge (loop->inner);
loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
@@ -10424,7 +10133,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
iv_loop = loop;
gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
- if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1))
+ if (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1)
{
/* The current SLP code creates the step value element-by-element. */
if (dump_enabled_p ())
@@ -10480,41 +10189,28 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (!vec_stmt) /* transformation not required. */
{
unsigned inside_cost = 0, prologue_cost = 0;
- if (slp_node)
- {
- /* We eventually need to set a vector type on invariant
- arguments. */
- unsigned j;
- slp_tree child;
- FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
- if (!vect_maybe_update_slp_op_vectype
- (child, SLP_TREE_VECTYPE (slp_node)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "incompatible vector types for "
- "invariants\n");
- return false;
- }
- /* loop cost for vec_loop. */
- inside_cost
- = record_stmt_cost (cost_vec,
- SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
- vector_stmt, stmt_info, 0, vect_body);
- /* prologue cost for vec_init (if not nested) and step. */
- prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
- scalar_to_vec,
- stmt_info, 0, vect_prologue);
- }
- else /* if (!slp_node) */
- {
- /* loop cost for vec_loop. */
- inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
- /* prologue cost for vec_init and vec_step. */
- prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
- stmt_info, 0, vect_prologue);
- }
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
+ /* loop cost for vec_loop. */
+ inside_cost = record_stmt_cost (cost_vec,
+ SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vector_stmt, stmt_info, 0, vect_body);
+ /* prologue cost for vec_init (if not nested) and step. */
+ prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
+ scalar_to_vec,
+ stmt_info, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vect_model_induction_cost: inside_cost = %d, "
@@ -10545,670 +10241,374 @@ vectorizable_induction (loop_vec_info loop_vinfo,
with group size 3 we need
[i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1]
[i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
- if (slp_node)
+ gimple_stmt_iterator incr_si;
+ bool insert_after;
+ standard_iv_increment_position (iv_loop, &incr_si, &insert_after);
+
+ /* The initial values are vectorized, but any lanes > group_size
+ need adjustment. */
+ slp_tree init_node
+ = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
+
+ /* Gather steps. Since we do not vectorize inductions as
+ cycles we have to reconstruct the step from SCEV data. */
+ unsigned group_size = SLP_TREE_LANES (slp_node);
+ tree *steps = XALLOCAVEC (tree, group_size);
+ tree *inits = XALLOCAVEC (tree, group_size);
+ stmt_vec_info phi_info;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
+ {
+ steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+ if (!init_node)
+ inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
+ pe->dest_idx);
+ }
+
+ /* Now generate the IVs. */
+ unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ gcc_assert (multiple_p (nunits * nvects, group_size));
+ unsigned nivs;
+ unsigned HOST_WIDE_INT const_nunits;
+ if (nested_in_vect_loop)
+ nivs = nvects;
+ else if (nunits.is_constant (&const_nunits))
{
- gimple_stmt_iterator incr_si;
- bool insert_after;
- standard_iv_increment_position (iv_loop, &incr_si, &insert_after);
-
- /* The initial values are vectorized, but any lanes > group_size
- need adjustment. */
- slp_tree init_node
- = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
-
- /* Gather steps. Since we do not vectorize inductions as
- cycles we have to reconstruct the step from SCEV data. */
- unsigned group_size = SLP_TREE_LANES (slp_node);
- tree *steps = XALLOCAVEC (tree, group_size);
- tree *inits = XALLOCAVEC (tree, group_size);
- stmt_vec_info phi_info;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
- {
- steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
- if (!init_node)
- inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
- pe->dest_idx);
- }
-
- /* Now generate the IVs. */
- unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- gcc_assert (multiple_p (nunits * nvects, group_size));
- unsigned nivs;
- unsigned HOST_WIDE_INT const_nunits;
- if (nested_in_vect_loop)
- nivs = nvects;
- else if (nunits.is_constant (&const_nunits))
- {
- /* Compute the number of distinct IVs we need. First reduce
- group_size if it is a multiple of const_nunits so we get
- one IV for a group_size of 4 but const_nunits 2. */
- unsigned group_sizep = group_size;
- if (group_sizep % const_nunits == 0)
- group_sizep = group_sizep / const_nunits;
- nivs = least_common_multiple (group_sizep,
- const_nunits) / const_nunits;
- }
- else
- {
- gcc_assert (SLP_TREE_LANES (slp_node) == 1);
- nivs = 1;
- }
- gimple_seq init_stmts = NULL;
- tree lupdate_mul = NULL_TREE;
- if (!nested_in_vect_loop)
+ /* Compute the number of distinct IVs we need. First reduce
+ group_size if it is a multiple of const_nunits so we get
+ one IV for a group_size of 4 but const_nunits 2. */
+ unsigned group_sizep = group_size;
+ if (group_sizep % const_nunits == 0)
+ group_sizep = group_sizep / const_nunits;
+ nivs = least_common_multiple (group_sizep, const_nunits) / const_nunits;
+ }
+ else
+ {
+ gcc_assert (SLP_TREE_LANES (slp_node) == 1);
+ nivs = 1;
+ }
+ gimple_seq init_stmts = NULL;
+ tree lupdate_mul = NULL_TREE;
+ if (!nested_in_vect_loop)
+ {
+ if (nunits.is_constant (&const_nunits))
{
- if (nunits.is_constant (&const_nunits))
- {
- /* The number of iterations covered in one vector iteration. */
- unsigned lup_mul = (nvects * const_nunits) / group_size;
- lupdate_mul
- = build_vector_from_val (step_vectype,
- SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept, lup_mul,
- UNSIGNED)
- : build_int_cstu (stept, lup_mul));
- }
- else
- {
- if (SCALAR_FLOAT_TYPE_P (stept))
- {
- tree tem = build_int_cst (integer_type_node, vf);
- lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
- stept, tem);
- }
- else
- lupdate_mul = build_int_cst (stept, vf);
- lupdate_mul = gimple_build_vector_from_val (&init_stmts,
- step_vectype,
- lupdate_mul);
- }
+ /* The number of iterations covered in one vector iteration. */
+ unsigned lup_mul = (nvects * const_nunits) / group_size;
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, lup_mul,
+ UNSIGNED)
+ : build_int_cstu (stept, lup_mul));
}
- tree peel_mul = NULL_TREE;
- if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+ else
{
if (SCALAR_FLOAT_TYPE_P (stept))
- peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept,
- LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
- else
- peel_mul = gimple_convert (&init_stmts, stept,
- LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
- peel_mul = gimple_build_vector_from_val (&init_stmts,
- step_vectype, peel_mul);
-
- /* If early break then we have to create a new PHI which we can use as
- an offset to adjust the induction reduction in early exits.
-
- This is because when peeling for alignment using masking, the first
- few elements of the vector can be inactive. As such if we find the
- entry in the first iteration we have adjust the starting point of
- the scalar code.
-
- We do this by creating a new scalar PHI that keeps track of whether
- we are the first iteration of the loop (with the additional masking)
- or whether we have taken a loop iteration already.
-
- The generated sequence:
-
- pre-header:
- bb1:
- i_1 = <number of leading inactive elements>
-
- header:
- bb2:
- i_2 = PHI <i_1(bb1), 0(latch)>
- …
-
- early-exit:
- bb3:
- i_3 = iv_step * i_2 + PHI<vector-iv>
-
- The first part of the adjustment to create i_1 and i_2 are done here
- and the last part creating i_3 is done in
- vectorizable_live_operations when the induction extraction is
- materialized. */
- if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo))
{
- auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
- tree ty_skip_niters = TREE_TYPE (skip_niters);
- tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters,
- vect_scalar_var,
- "pfa_iv_offset");
- gphi *nphi = create_phi_node (break_lhs_phi, bb);
- add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION);
- add_phi_arg (nphi, build_zero_cst (ty_skip_niters),
- loop_latch_edge (iv_loop), UNKNOWN_LOCATION);
-
- LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo)
- = PHI_RESULT (nphi);
+ tree tem = build_int_cst (integer_type_node, vf);
+ lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem);
}
+ else
+ lupdate_mul = build_int_cst (stept, vf);
+ lupdate_mul = gimple_build_vector_from_val (&init_stmts, step_vectype,
+ lupdate_mul);
}
- tree step_mul = NULL_TREE;
- unsigned ivn;
- auto_vec<tree> vec_steps;
- for (ivn = 0; ivn < nivs; ++ivn)
+ }
+ tree peel_mul = NULL_TREE;
+ if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+ {
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ else
+ peel_mul = gimple_convert (&init_stmts, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ peel_mul = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, peel_mul);
+
+ /* If early break then we have to create a new PHI which we can use as
+ an offset to adjust the induction reduction in early exits.
+
+ This is because when peeling for alignment using masking, the first
+ few elements of the vector can be inactive. As such if we find the
+ entry in the first iteration we have adjust the starting point of
+ the scalar code.
+
+ We do this by creating a new scalar PHI that keeps track of whether
+ we are the first iteration of the loop (with the additional masking)
+ or whether we have taken a loop iteration already.
+
+ The generated sequence:
+
+ pre-header:
+ bb1:
+ i_1 = <number of leading inactive elements>
+
+ header:
+ bb2:
+ i_2 = PHI <i_1(bb1), 0(latch)>
+ …
+
+ early-exit:
+ bb3:
+ i_3 = iv_step * i_2 + PHI<vector-iv>
+
+ The first part of the adjustment to create i_1 and i_2 are done here
+ and the last part creating i_3 is done in
+ vectorizable_live_operations when the induction extraction is
+ materialized. */
+ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+ && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo))
+ {
+ auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ tree ty_skip_niters = TREE_TYPE (skip_niters);
+ tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters,
+ vect_scalar_var,
+ "pfa_iv_offset");
+ gphi *nphi = create_phi_node (break_lhs_phi, bb);
+ add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION);
+ add_phi_arg (nphi, build_zero_cst (ty_skip_niters),
+ loop_latch_edge (iv_loop), UNKNOWN_LOCATION);
+
+ LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo) = PHI_RESULT (nphi);
+ }
+ }
+ tree step_mul = NULL_TREE;
+ unsigned ivn;
+ auto_vec<tree> vec_steps;
+ for (ivn = 0; ivn < nivs; ++ivn)
+ {
+ gimple_seq stmts = NULL;
+ bool invariant = true;
+ if (nunits.is_constant (&const_nunits))
{
- gimple_seq stmts = NULL;
- bool invariant = true;
- if (nunits.is_constant (&const_nunits))
+ tree_vector_builder step_elts (step_vectype, const_nunits, 1);
+ tree_vector_builder init_elts (vectype, const_nunits, 1);
+ tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
+ for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
{
- tree_vector_builder step_elts (step_vectype, const_nunits, 1);
- tree_vector_builder init_elts (vectype, const_nunits, 1);
- tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
- for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
- {
- /* The scalar steps of the IVs. */
- tree elt = steps[(ivn*const_nunits + eltn) % group_size];
- elt = gimple_convert (&init_stmts,
- TREE_TYPE (step_vectype), elt);
- step_elts.quick_push (elt);
- if (!init_node)
- {
- /* The scalar inits of the IVs if not vectorized. */
- elt = inits[(ivn*const_nunits + eltn) % group_size];
- if (!useless_type_conversion_p (TREE_TYPE (vectype),
- TREE_TYPE (elt)))
- elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
- TREE_TYPE (vectype), elt);
- init_elts.quick_push (elt);
- }
- /* The number of steps to add to the initial values. */
- unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
- mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept, mul_elt,
- UNSIGNED)
- : build_int_cstu (stept, mul_elt));
- }
- vec_step = gimple_build_vector (&init_stmts, &step_elts);
- step_mul = gimple_build_vector (&init_stmts, &mul_elts);
+ /* The scalar steps of the IVs. */
+ tree elt = steps[(ivn*const_nunits + eltn) % group_size];
+ elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), elt);
+ step_elts.quick_push (elt);
if (!init_node)
- vec_init = gimple_build_vector (&init_stmts, &init_elts);
- }
- else
- {
- if (init_node)
- ;
- else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
{
- new_name = gimple_convert (&init_stmts, stept, inits[0]);
- /* Build the initial value directly as a VEC_SERIES_EXPR. */
- vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
- step_vectype, new_name, steps[0]);
- if (!useless_type_conversion_p (vectype, step_vectype))
- vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
- vectype, vec_init);
+ /* The scalar inits of the IVs if not vectorized. */
+ elt = inits[(ivn*const_nunits + eltn) % group_size];
+ if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (elt)))
+ elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ TREE_TYPE (vectype), elt);
+ init_elts.quick_push (elt);
}
- else
- {
- /* Build:
- [base, base, base, ...]
- + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
- gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
- gcc_assert (flag_associative_math);
- gcc_assert (index_vectype != NULL_TREE);
-
- tree index = build_index_vector (index_vectype, 0, 1);
- new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]),
- inits[0]);
- tree base_vec = gimple_build_vector_from_val (&init_stmts,
- step_vectype,
- new_name);
- tree step_vec = gimple_build_vector_from_val (&init_stmts,
- step_vectype,
- steps[0]);
- vec_init = gimple_build (&init_stmts, FLOAT_EXPR,
- step_vectype, index);
- vec_init = gimple_build (&init_stmts, MULT_EXPR,
- step_vectype, vec_init, step_vec);
- vec_init = gimple_build (&init_stmts, PLUS_EXPR,
- step_vectype, vec_init, base_vec);
- if (!useless_type_conversion_p (vectype, step_vectype))
- vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
- vectype, vec_init);
- }
- /* iv_loop is nested in the loop to be vectorized. Generate:
- vec_step = [S, S, S, S] */
- t = unshare_expr (steps[0]);
- gcc_assert (CONSTANT_CLASS_P (t)
- || TREE_CODE (t) == SSA_NAME);
- vec_step = gimple_build_vector_from_val (&init_stmts,
- step_vectype, t);
- }
- vec_steps.safe_push (vec_step);
- if (peel_mul)
- {
- if (!step_mul)
- step_mul = peel_mul;
- else
- step_mul = gimple_build (&init_stmts,
- MINUS_EXPR, step_vectype,
- step_mul, peel_mul);
- }
-
- /* Create the induction-phi that defines the induction-operand. */
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
- "vec_iv_");
- induction_phi = create_phi_node (vec_dest, iv_loop->header);
- induc_def = PHI_RESULT (induction_phi);
-
- /* Create the iv update inside the loop */
- tree up = vec_step;
- if (lupdate_mul)
- {
- if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
- {
- /* When we're using loop_len produced by SELEC_VL, the
- non-final iterations are not always processing VF
- elements. So vectorize induction variable instead of
-
- _21 = vect_vec_iv_.6_22 + { VF, ... };
-
- We should generate:
-
- _35 = .SELECT_VL (ivtmp_33, VF);
- vect_cst__22 = [vec_duplicate_expr] _35;
- _21 = vect_vec_iv_.6_22 + vect_cst__22; */
- vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
- tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
- vectype, 0, 0);
- if (SCALAR_FLOAT_TYPE_P (stept))
- expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
- else
- expr = gimple_convert (&stmts, stept, len);
- lupdate_mul = gimple_build_vector_from_val (&stmts,
- step_vectype,
- expr);
- up = gimple_build (&stmts, MULT_EXPR,
- step_vectype, vec_step, lupdate_mul);
- }
- else
- up = gimple_build (&init_stmts,
- MULT_EXPR, step_vectype,
- vec_step, lupdate_mul);
- }
- vec_def = gimple_convert (&stmts, step_vectype, induc_def);
- vec_def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, vec_def, up);
- vec_def = gimple_convert (&stmts, vectype, vec_def);
- insert_iv_increment (&incr_si, insert_after, stmts);
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
-
- if (init_node)
- vec_init = vect_get_slp_vect_def (init_node, ivn);
- if (!nested_in_vect_loop
- && step_mul
- && !integer_zerop (step_mul))
- {
- gcc_assert (invariant);
- vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
- up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
- vec_step, step_mul);
- vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
- vec_def, up);
- vec_init = gimple_convert (&init_stmts, vectype, vec_def);
- }
-
- /* Set the arguments of the phi node: */
- add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
-
- slp_node->push_vec_def (induction_phi);
- }
- if (!nested_in_vect_loop)
- {
- /* Fill up to the number of vectors we need for the whole group. */
- if (nunits.is_constant (&const_nunits))
- nivs = least_common_multiple (group_size,
- const_nunits) / const_nunits;
- else
- nivs = 1;
- vec_steps.reserve (nivs-ivn);
- for (; ivn < nivs; ++ivn)
- {
- slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[0]);
- vec_steps.quick_push (vec_steps[0]);
+ /* The number of steps to add to the initial values. */
+ unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
+ mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, mul_elt,
+ UNSIGNED)
+ : build_int_cstu (stept, mul_elt));
}
+ vec_step = gimple_build_vector (&init_stmts, &step_elts);
+ step_mul = gimple_build_vector (&init_stmts, &mul_elts);
+ if (!init_node)
+ vec_init = gimple_build_vector (&init_stmts, &init_elts);
}
-
- /* Re-use IVs when we can. We are generating further vector
- stmts by adding VF' * stride to the IVs generated above. */
- if (ivn < nvects)
+ else
{
- if (nunits.is_constant (&const_nunits))
+ if (init_node)
+ ;
+ else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
{
- unsigned vfp = (least_common_multiple (group_size, const_nunits)
- / group_size);
- lupdate_mul
- = build_vector_from_val (step_vectype,
- SCALAR_FLOAT_TYPE_P (stept)
- ? build_real_from_wide (stept,
- vfp, UNSIGNED)
- : build_int_cstu (stept, vfp));
+ new_name = gimple_convert (&init_stmts, stept, inits[0]);
+ /* Build the initial value directly as a VEC_SERIES_EXPR. */
+ vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
+ step_vectype, new_name, steps[0]);
+ if (!useless_type_conversion_p (vectype, step_vectype))
+ vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ vectype, vec_init);
}
else
{
- if (SCALAR_FLOAT_TYPE_P (stept))
- {
- tree tem = build_int_cst (integer_type_node, nunits);
- lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
- stept, tem);
- }
- else
- lupdate_mul = build_int_cst (stept, nunits);
- lupdate_mul = gimple_build_vector_from_val (&init_stmts,
- step_vectype,
- lupdate_mul);
- }
- for (; ivn < nvects; ++ivn)
- {
- gimple *iv
- = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]);
- tree def = gimple_get_lhs (iv);
- if (ivn < 2*nivs)
- vec_steps[ivn - nivs]
- = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
- vec_steps[ivn - nivs], lupdate_mul);
- gimple_seq stmts = NULL;
- def = gimple_convert (&stmts, step_vectype, def);
- def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
- def, vec_steps[ivn % nivs]);
- def = gimple_convert (&stmts, vectype, def);
- if (gimple_code (iv) == GIMPLE_PHI)
- gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
- else
- {
- gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
- gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
- }
- slp_node->push_vec_def (def);
+ /* Build:
+ [base, base, base, ...]
+ + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
+ gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
+ gcc_assert (flag_associative_math);
+ gcc_assert (index_vectype != NULL_TREE);
+
+ tree index = build_index_vector (index_vectype, 0, 1);
+ new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]),
+ inits[0]);
+ tree base_vec = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ new_name);
+ tree step_vec = gimple_build_vector_from_val (&init_stmts,
+ step_vectype,
+ steps[0]);
+ vec_init = gimple_build (&init_stmts, FLOAT_EXPR,
+ step_vectype, index);
+ vec_init = gimple_build (&init_stmts, MULT_EXPR,
+ step_vectype, vec_init, step_vec);
+ vec_init = gimple_build (&init_stmts, PLUS_EXPR,
+ step_vectype, vec_init, base_vec);
+ if (!useless_type_conversion_p (vectype, step_vectype))
+ vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
+ vectype, vec_init);
}
+ /* iv_loop is nested in the loop to be vectorized. Generate:
+ vec_step = [S, S, S, S] */
+ t = unshare_expr (steps[0]);
+ gcc_assert (CONSTANT_CLASS_P (t)
+ || TREE_CODE (t) == SSA_NAME);
+ vec_step = gimple_build_vector_from_val (&init_stmts,
+ step_vectype, t);
+ }
+ vec_steps.safe_push (vec_step);
+ if (peel_mul)
+ {
+ if (!step_mul)
+ step_mul = peel_mul;
+ else
+ step_mul = gimple_build (&init_stmts,
+ MINUS_EXPR, step_vectype,
+ step_mul, peel_mul);
}
- new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
- gcc_assert (!new_bb);
-
- return true;
- }
+ /* Create the induction-phi that defines the induction-operand. */
+ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
+ "vec_iv_");
+ induction_phi = create_phi_node (vec_dest, iv_loop->header);
+ induc_def = PHI_RESULT (induction_phi);
- tree init_expr = vect_phi_initial_value (phi);
-
- gimple_seq stmts = NULL;
- if (!nested_in_vect_loop)
- {
- /* Convert the initial value to the IV update type. */
- tree new_type = TREE_TYPE (step_expr);
- init_expr = gimple_convert (&stmts, new_type, init_expr);
-
- /* If we are using the loop mask to "peel" for alignment then we need
- to adjust the start value here. */
- tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
- if (skip_niters != NULL_TREE)
+ /* Create the iv update inside the loop */
+ tree up = vec_step;
+ if (lupdate_mul)
{
- if (FLOAT_TYPE_P (vectype))
- skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
- skip_niters);
- else
- skip_niters = gimple_convert (&stmts, new_type, skip_niters);
- tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
- skip_niters, step_expr);
- init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
- init_expr, skip_step);
- }
- }
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ {
+ /* When we're using loop_len produced by SELEC_VL, the
+ non-final iterations are not always processing VF
+ elements. So vectorize induction variable instead of
- if (stmts)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
- }
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
- /* Create the vector that holds the initial_value of the induction. */
- if (nested_in_vect_loop)
- {
- /* iv_loop is nested in the loop to be vectorized. init_expr had already
- been created during vectorization of previous stmts. We obtain it
- from the STMT_VINFO_VEC_STMT of the defining stmt. */
- auto_vec<tree> vec_inits;
- vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
- init_expr, &vec_inits);
- vec_init = vec_inits[0];
- /* If the initial value is not of proper type, convert it. */
- if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
- {
- new_stmt
- = gimple_build_assign (vect_get_new_ssa_name (vectype,
- vect_simple_var,
- "vec_iv_"),
- VIEW_CONVERT_EXPR,
- build1 (VIEW_CONVERT_EXPR, vectype,
- vec_init));
- vec_init = gimple_assign_lhs (new_stmt);
- new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
- new_stmt);
- gcc_assert (!new_bb);
- }
- }
- else
- {
- /* iv_loop is the loop to be vectorized. Create:
- vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
- stmts = NULL;
- new_name = gimple_convert (&stmts, TREE_TYPE (step_expr), init_expr);
+ We should generate:
- unsigned HOST_WIDE_INT const_nunits;
- if (nunits.is_constant (&const_nunits))
- {
- tree_vector_builder elts (step_vectype, const_nunits, 1);
- elts.quick_push (new_name);
- for (i = 1; i < const_nunits; i++)
- {
- /* Create: new_name_i = new_name + step_expr */
- new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
- new_name, step_expr);
- elts.quick_push (new_name);
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
+ vectype, 0, 0);
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
+ else
+ expr = gimple_convert (&stmts, stept, len);
+ lupdate_mul = gimple_build_vector_from_val (&stmts, step_vectype,
+ expr);
+ up = gimple_build (&stmts, MULT_EXPR,
+ step_vectype, vec_step, lupdate_mul);
}
- /* Create a vector from [new_name_0, new_name_1, ...,
- new_name_nunits-1] */
- vec_init = gimple_build_vector (&stmts, &elts);
- }
- else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr)))
- /* Build the initial value directly from a VEC_SERIES_EXPR. */
- vec_init = gimple_build (&stmts, VEC_SERIES_EXPR, step_vectype,
- new_name, step_expr);
- else
- {
- /* Build:
- [base, base, base, ...]
- + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
- gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)));
- gcc_assert (flag_associative_math);
- gcc_assert (index_vectype != NULL_TREE);
-
- tree index = build_index_vector (index_vectype, 0, 1);
- tree base_vec = gimple_build_vector_from_val (&stmts, step_vectype,
- new_name);
- tree step_vec = gimple_build_vector_from_val (&stmts, step_vectype,
- step_expr);
- vec_init = gimple_build (&stmts, FLOAT_EXPR, step_vectype, index);
- vec_init = gimple_build (&stmts, MULT_EXPR, step_vectype,
- vec_init, step_vec);
- vec_init = gimple_build (&stmts, PLUS_EXPR, step_vectype,
- vec_init, base_vec);
- }
- vec_init = gimple_convert (&stmts, vectype, vec_init);
+ else
+ up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_step, lupdate_mul);
+ }
+ vec_def = gimple_convert (&stmts, step_vectype, induc_def);
+ vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, up);
+ vec_def = gimple_convert (&stmts, vectype, vec_def);
+ insert_iv_increment (&incr_si, insert_after, stmts);
+ add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
+ UNKNOWN_LOCATION);
- if (stmts)
+ if (init_node)
+ vec_init = vect_get_slp_vect_def (init_node, ivn);
+ if (!nested_in_vect_loop
+ && step_mul
+ && !integer_zerop (step_mul))
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
+ gcc_assert (invariant);
+ vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
+ up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_step, step_mul);
+ vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype,
+ vec_def, up);
+ vec_init = gimple_convert (&init_stmts, vectype, vec_def);
}
- }
-
-
- /* Create the vector that holds the step of the induction. */
- gimple_stmt_iterator *step_iv_si = NULL;
- if (nested_in_vect_loop)
- /* iv_loop is nested in the loop to be vectorized. Generate:
- vec_step = [S, S, S, S] */
- new_name = step_expr;
- else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
- {
- /* When we're using loop_len produced by SELEC_VL, the non-final
- iterations are not always processing VF elements. So vectorize
- induction variable instead of
- _21 = vect_vec_iv_.6_22 + { VF, ... };
+ /* Set the arguments of the phi node: */
+ add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
- We should generate:
-
- _35 = .SELECT_VL (ivtmp_33, VF);
- vect_cst__22 = [vec_duplicate_expr] _35;
- _21 = vect_vec_iv_.6_22 + vect_cst__22; */
- gcc_assert (!slp_node);
- gimple_seq seq = NULL;
- vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
- tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
- expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
- unshare_expr (len)),
- &seq, true, NULL_TREE);
- new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), expr,
- step_expr);
- gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
- step_iv_si = &si;
+ slp_node->push_vec_def (induction_phi);
}
- else
+ if (!nested_in_vect_loop)
{
- /* iv_loop is the loop to be vectorized. Generate:
- vec_step = [VF*S, VF*S, VF*S, VF*S] */
- gimple_seq seq = NULL;
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
- {
- expr = build_int_cst (integer_type_node, vf);
- expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
- }
+ /* Fill up to the number of vectors we need for the whole group. */
+ if (nunits.is_constant (&const_nunits))
+ nivs = least_common_multiple (group_size, const_nunits) / const_nunits;
else
- expr = build_int_cst (TREE_TYPE (step_expr), vf);
- new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (seq)
+ nivs = 1;
+ vec_steps.reserve (nivs-ivn);
+ for (; ivn < nivs; ++ivn)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
+ slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[0]);
+ vec_steps.quick_push (vec_steps[0]);
}
}
- t = unshare_expr (new_name);
- gcc_assert (CONSTANT_CLASS_P (new_name)
- || TREE_CODE (new_name) == SSA_NAME);
- new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, step_iv_si);
-
-
- /* Create the following def-use cycle:
- loop prolog:
- vec_init = ...
- vec_step = ...
- loop:
- vec_iv = PHI <vec_init, vec_loop>
- ...
- STMT
- ...
- vec_loop = vec_iv + vec_step; */
-
- /* Create the induction-phi that defines the induction-operand. */
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
- induction_phi = create_phi_node (vec_dest, iv_loop->header);
- induc_def = PHI_RESULT (induction_phi);
-
- /* Create the iv update inside the loop */
- stmts = NULL;
- vec_def = gimple_convert (&stmts, step_vectype, induc_def);
- vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, vec_step);
- vec_def = gimple_convert (&stmts, vectype, vec_def);
- gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
- new_stmt = SSA_NAME_DEF_STMT (vec_def);
-
- /* Set the arguments of the phi node: */
- add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
-
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi);
- *vec_stmt = induction_phi;
-
- /* In case that vectorization factor (VF) is bigger than the number
- of elements that we can fit in a vectype (nunits), we have to generate
- more than one vector stmt - i.e - we need to "unroll" the
- vector stmt by a factor VF/nunits. For more details see documentation
- in vectorizable_operation. */
-
- if (ncopies > 1)
+ /* Re-use IVs when we can. We are generating further vector
+ stmts by adding VF' * stride to the IVs generated above. */
+ if (ivn < nvects)
{
- gimple_seq seq = NULL;
- /* FORNOW. This restriction should be relaxed. */
- gcc_assert (!nested_in_vect_loop);
- /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false if ncopies > 1. */
- gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
-
- /* Create the vector that holds the step of the induction. */
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ if (nunits.is_constant (&const_nunits))
{
- expr = build_int_cst (integer_type_node, nunits);
- expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
+ unsigned vfp = (least_common_multiple (group_size, const_nunits)
+ / group_size);
+ lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ vfp, UNSIGNED)
+ : build_int_cstu (stept, vfp));
}
else
- expr = build_int_cst (TREE_TYPE (step_expr), nunits);
- new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (seq)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
- }
-
- t = unshare_expr (new_name);
- gcc_assert (CONSTANT_CLASS_P (new_name)
- || TREE_CODE (new_name) == SSA_NAME);
- new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
-
- vec_def = induc_def;
- for (i = 1; i < ncopies + 1; i++)
- {
- /* vec_i = vec_prev + vec_step */
- gimple_seq stmts = NULL;
- vec_def = gimple_convert (&stmts, step_vectype, vec_def);
- vec_def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, vec_def, vec_step);
- vec_def = gimple_convert (&stmts, vectype, vec_def);
-
- gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
- if (i < ncopies)
+ if (SCALAR_FLOAT_TYPE_P (stept))
{
- new_stmt = SSA_NAME_DEF_STMT (vec_def);
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ tree tem = build_int_cst (integer_type_node, nunits);
+ lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem);
}
else
+ lupdate_mul = build_int_cst (stept, nunits);
+ lupdate_mul = gimple_build_vector_from_val (&init_stmts, step_vectype,
+ lupdate_mul);
+ }
+ for (; ivn < nvects; ++ivn)
+ {
+ gimple *iv
+ = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]);
+ tree def = gimple_get_lhs (iv);
+ if (ivn < 2*nivs)
+ vec_steps[ivn - nivs]
+ = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
+ vec_steps[ivn - nivs], lupdate_mul);
+ gimple_seq stmts = NULL;
+ def = gimple_convert (&stmts, step_vectype, def);
+ def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
+ def, vec_steps[ivn % nivs]);
+ def = gimple_convert (&stmts, vectype, def);
+ if (gimple_code (iv) == GIMPLE_PHI)
+ gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ else
{
- /* vec_1 = vec_iv + (VF/n * S)
- vec_2 = vec_1 + (VF/n * S)
- ...
- vec_n = vec_prev + (VF/n * S) = vec_iv + VF * S = vec_loop
-
- vec_n is used as vec_loop to save the large step register and
- related operations. */
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
+ gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
+ gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
}
+ slp_node->push_vec_def (def);
}
}
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "transform induction: created def-use cycle: %G%G",
- (gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def));
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
+ gcc_assert (!new_bb);
return true;
}
@@ -12203,7 +11603,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
gimple *stmt = STMT_VINFO_STMT (related_vinfo);
stmt_worklist.safe_push (stmt);
/* Set BB such that the assert in
- 'get_initial_def_for_reduction' is able to determine that
+ 'get_initial_defs_for_reduction' is able to determine that
the BB of the related stmt is inside this loop. */
gimple_set_bb (stmt,
gimple_bb (new_stmt));
@@ -12701,6 +12101,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
" variable-length vectorization factor\n");
}
+
+ /* When we have unrolled the loop due to a user requested value we should
+ leave it up to the RTL unroll heuristics to determine if it's still worth
+ while to unroll more. */
+ if (LOOP_VINFO_USER_UNROLL (loop_vinfo))
+ loop->unroll = 0;
+
/* Free SLP instances here because otherwise stmt reference counting
won't work. */
slp_instance instance;