aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-03 07:15:47 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-03 07:15:47 +0000
commite54dd6d3a791536543d4769aa09508b89d882f37 (patch)
treedb4448d26a9b2471ac93890a2efe92185d049fe5 /gcc/tree-vect-loop.c
parent9031b367ac87550552318f6516487c70f3ce9a99 (diff)
downloadgcc-e54dd6d3a791536543d4769aa09508b89d882f37.zip
gcc-e54dd6d3a791536543d4769aa09508b89d882f37.tar.gz
gcc-e54dd6d3a791536543d4769aa09508b89d882f37.tar.bz2
poly_int: vectorizable_reduction
This patch makes vectorizable_reduction cope with variable-length vectors. We can handle the simple case of an inner loop reduction for which the target has native support for the epilogue operation. For now we punt on other cases, but patches after the main SVE submission allow SLP and double reductions too. 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree.h (build_index_vector): Declare. * tree.c (build_index_vector): New function. * tree-vect-loop.c (get_initial_defs_for_reduction): Treat the number of units as polynomial, forcibly converting it to a constant if vectorizable_reduction has already enforced the condition. (vect_create_epilog_for_reduction): Likewise. Use build_index_vector to create a {1,2,3,...} vector. (vectorizable_reduction): Treat the number of units as polynomial. Choose vectype_in based on the largest scalar element size rather than the smallest number of units. Enforce the restrictions relied on above. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256133
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c66
1 files changed, 51 insertions, 15 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 557522c..93e430c 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4150,6 +4150,8 @@ get_initial_defs_for_reduction (slp_tree slp_node,
vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
scalar_type = TREE_TYPE (vector_type);
+ /* vectorizable_reduction has already rejected SLP reductions on
+ variable-length vectors. */
nunits = TYPE_VECTOR_SUBPARTS (vector_type);
gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def);
@@ -4537,8 +4539,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
{
tree indx_before_incr, indx_after_incr;
- int nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
- int k;
+ poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
@@ -4554,10 +4555,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
vector size (STEP). */
/* Create a {1,2,3,...} vector. */
- tree_vector_builder vtemp (cr_index_vector_type, 1, 3);
- for (k = 0; k < 3; ++k)
- vtemp.quick_push (build_int_cst (cr_index_scalar_type, k + 1));
- tree series_vect = vtemp.build ();
+ tree series_vect = build_index_vector (cr_index_vector_type, 1, 1);
/* Create a vector of the step value. */
tree step = build_int_cst (cr_index_scalar_type, nunits_out);
@@ -4935,8 +4933,11 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result));
tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
- unsigned HOST_WIDE_INT v_size
- = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
+ /* Enforced by vectorizable_reduction, which ensures we have target
+ support before allowing a conditional reduction on variable-length
+ vectors. */
+ unsigned HOST_WIDE_INT v_size = el_size * nunits.to_constant ();
tree idx_val = NULL_TREE, val = NULL_TREE;
for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size)
{
@@ -5055,6 +5056,9 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
{
bool reduce_with_shift = have_whole_vector_shift (mode);
int element_bitsize = tree_to_uhwi (bitsize);
+ /* Enforced by vectorizable_reduction, which disallows SLP reductions
+ for variable-length vectors and also requires direct target support
+ for loop reductions. */
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
tree vec_temp;
@@ -5743,10 +5747,10 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
if (k == 1
&& gimple_assign_rhs_code (reduc_stmt) == COND_EXPR)
continue;
- tem = get_vectype_for_scalar_type (TREE_TYPE (op));
- if (! vectype_in
- || TYPE_VECTOR_SUBPARTS (tem) < TYPE_VECTOR_SUBPARTS (vectype_in))
- vectype_in = tem;
+ if (!vectype_in
+ || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
+ < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op)))))
+ vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
break;
}
gcc_assert (vectype_in);
@@ -5912,7 +5916,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
/* To properly compute ncopies we are interested in the widest
input type in case we're looking at a widening accumulation. */
if (!vectype_in
- || TYPE_VECTOR_SUBPARTS (vectype_in) > TYPE_VECTOR_SUBPARTS (tem))
+ || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
+ < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))
vectype_in = tem;
}
@@ -6097,6 +6102,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
gcc_assert (ncopies >= 1);
vec_mode = TYPE_MODE (vectype_in);
+ poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (code == COND_EXPR)
{
@@ -6278,14 +6284,23 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
int scalar_precision
= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
cr_index_scalar_type = make_unsigned_type (scalar_precision);
- cr_index_vector_type = build_vector_type
- (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
+ cr_index_vector_type = build_vector_type (cr_index_scalar_type,
+ nunits_out);
if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type,
OPTIMIZE_FOR_SPEED))
reduc_fn = IFN_REDUC_MAX;
}
+ if (reduc_fn == IFN_LAST && !nunits_out.is_constant ())
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "missing target support for reduction on"
+ " variable-length vectors.\n");
+ return false;
+ }
+
if ((double_reduc
|| STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
&& ncopies > 1)
@@ -6297,6 +6312,27 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
return false;
}
+ if (double_reduc && !nunits_out.is_constant ())
+ {
+ /* The current double-reduction code creates the initial value
+ element-by-element. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "double reduction not supported for variable-length"
+ " vectors.\n");
+ return false;
+ }
+
+ if (slp_node && !nunits_out.is_constant ())
+ {
+ /* The current SLP code creates the initial value element-by-element. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "SLP reduction not supported for variable-length"
+ " vectors.\n");
+ return false;
+ }
+
/* In case of widenning multiplication by a constant, we update the type
of the constant to be the type of the other operand. We check that the
constant fits the type in the pattern recognition pass. */