aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r--gcc/tree-vect-loop.cc82
1 files changed, 52 insertions, 30 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 6c24202..9320bf8 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -5219,6 +5219,15 @@ vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code,
new_temp = gimple_build (seq, code, vectype1, dst1, dst2);
}
+ if (!useless_type_conversion_p (vectype, TREE_TYPE (new_temp)))
+ {
+ tree dst3 = make_ssa_name (vectype);
+ gimple *epilog_stmt = gimple_build_assign (dst3, VIEW_CONVERT_EXPR,
+ build1 (VIEW_CONVERT_EXPR,
+ vectype, new_temp));
+ gimple_seq_add_stmt_without_update (seq, epilog_stmt);
+ new_temp = dst3;
+ }
return new_temp;
}
@@ -5607,13 +5616,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
&& VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info)
&& vectype != VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info))
{
- gcc_assert (reduc_inputs.length () == 1);
- vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info);
+ compute_vectype = vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info);
gimple_seq stmts = NULL;
- reduc_inputs[0] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
- reduc_inputs[0],
- build_one_cst (vectype),
- build_zero_cst (vectype));
+ for (unsigned i = 0; i < reduc_inputs.length (); ++i)
+ reduc_inputs[i] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
+ reduc_inputs[i],
+ build_one_cst (vectype),
+ build_zero_cst (vectype));
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
}
@@ -5954,25 +5963,29 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits);
}
}
- if (!slp_reduc
- && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
+ else if (!slp_reduc
+ && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
- tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
- stype, nunits1);
+ tree vectype1 = vectype;
+ if (mode1 != mode)
+ {
+ vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
+ stype, nunits1);
+ /* First reduce the vector to the desired vector size we should
+ do shift reduction on by combining upper and lower halves. */
+ gimple_seq stmts = NULL;
+ new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1,
+ code, &stmts);
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+ reduc_inputs[0] = new_temp;
+ }
+
reduce_with_shift = have_whole_vector_shift (mode1);
if (!VECTOR_MODE_P (mode1)
|| !directly_supported_p (code, vectype1))
reduce_with_shift = false;
- /* First reduce the vector to the desired vector size we should
- do shift reduction on by combining upper and lower halves. */
- gimple_seq stmts = NULL;
- new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1,
- code, &stmts);
- gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
- reduc_inputs[0] = new_temp;
-
if (reduce_with_shift && (!slp_reduc || group_size == 1))
{
tree bitsize = TYPE_SIZE (TREE_TYPE (vectype1));
@@ -6000,7 +6013,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
"Reduce using vector shifts\n");
gimple_seq stmts = NULL;
- new_temp = gimple_convert (&stmts, vectype1, new_temp);
+ new_temp = gimple_convert (&stmts, vectype1, reduc_inputs[0]);
for (elt_offset = nelements / 2;
elt_offset >= 1;
elt_offset /= 2)
@@ -6044,13 +6057,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
"Reduce using scalar code.\n");
tree compute_type = TREE_TYPE (vectype1);
- tree bitsize = TYPE_SIZE (compute_type);
- int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
- int element_bitsize = tree_to_uhwi (bitsize);
+ unsigned vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
+ unsigned element_bitsize = vector_element_bits (vectype1);
+ tree bitsize = bitsize_int (element_bitsize);
gimple_seq stmts = NULL;
FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp)
{
- int bit_offset;
+ unsigned bit_offset;
new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type,
vec_temp, bitsize, bitsize_zero_node);
@@ -6392,27 +6405,30 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
would also allow generalizing this for reduction paths of length > 1
and/or SLP reductions. */
slp_tree reduc_node = SLP_TREE_CHILDREN (slp_node)[reduc_index];
- tree reduc_var = vect_get_slp_scalar_def (reduc_node, 0);
+ stmt_vec_info reduc_var_def = SLP_TREE_SCALAR_STMTS (reduc_node)[0];
+ tree reduc_var = gimple_get_lhs (STMT_VINFO_STMT (reduc_var_def));
/* The operands either come from a binary operation or an IFN_COND operation.
The former is a gimple assign with binary rhs and the latter is a
gimple call with four arguments. */
gcc_assert (num_ops == 2 || num_ops == 4);
- int group_size = 1;
- stmt_vec_info scalar_dest_def_info;
auto_vec<tree> vec_oprnds0, vec_opmask;
vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0)
+ (1 - reduc_index)],
&vec_oprnds0);
- group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
- scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
/* For an IFN_COND_OP we also need the vector mask operand. */
if (is_cond_op)
vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask);
- gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt;
- tree scalar_dest = gimple_get_lhs (sdef);
+ /* The transform below relies on preserving the original scalar PHI
+ and its latch def which we replace. So work backwards from there. */
+ tree scalar_dest
+ = gimple_phi_arg_def_from_edge (as_a <gphi *> (STMT_VINFO_STMT
+ (reduc_var_def)),
+ loop_latch_edge (loop));
+ stmt_vec_info scalar_dest_def_info
+ = vect_stmt_to_vectorize (loop_vinfo->lookup_def (scalar_dest));
tree scalar_type = TREE_TYPE (scalar_dest);
int vec_num = vec_oprnds0.length ();
@@ -7565,6 +7581,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
values into the low-numbered elements. */
if ((double_reduc || neutral_op)
&& !nunits_out.is_constant ()
+ && (SLP_TREE_LANES (slp_node) != 1 && !reduc_chain)
+ && !operand_equal_p (neutral_op, vect_phi_initial_value (reduc_def_phi))
&& !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT,
vectype_out, OPTIMIZE_FOR_SPEED))
{
@@ -8211,6 +8229,10 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
/* Leave the scalar phi in place. */
return true;
+ if (reduc_info && reduc_info->is_reduc_chain && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vectorizing a reduction chain\n");
+
vec_num = vect_get_num_copies (loop_vinfo, slp_node);
/* Check whether we should use a single PHI node and accumulate