aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-13 17:59:59 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-13 17:59:59 +0000
commitbb6c2b68d6961dfe98bece34e4418d7287ce7089 (patch)
treeecb67de45fc3758acd23d527e406c0441be496d4 /gcc/tree-vect-loop.c
parentbfe1bb57ba4dfd78f8c1ac7d46cf27e8e7408676 (diff)
downloadgcc-bb6c2b68d6961dfe98bece34e4418d7287ce7089.zip
gcc-bb6c2b68d6961dfe98bece34e4418d7287ce7089.tar.gz
gcc-bb6c2b68d6961dfe98bece34e4418d7287ce7089.tar.bz2
Add support for conditional reductions using SVE CLASTB
This patch uses SVE CLASTB to optimise conditional reductions. It means that we no longer need to maintain a separate index vector to record the most recent valid value, and no longer need to worry about overflow cases. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/md.texi (fold_extract_last_@var{m}): Document. * doc/sourcebuild.texi (vect_fold_extract_last): Likewise. * optabs.def (fold_extract_last_optab): New optab. * internal-fn.def (FOLD_EXTRACT_LAST): New internal function. * internal-fn.c (fold_extract_direct): New macro. (expand_fold_extract_optab_fn): Likewise. (direct_fold_extract_optab_supported_p): Likewise. * tree-vectorizer.h (EXTRACT_LAST_REDUCTION): New vect_reduction_type. * tree-vect-loop.c (vect_model_reduction_cost): Handle EXTRACT_LAST_REDUCTION. (get_initial_def_for_reduction): Do not create an initial vector for EXTRACT_LAST_REDUCTION reductions. (vectorizable_reduction): Leave the scalar phi in place for EXTRACT_LAST_REDUCTIONs. Try using EXTRACT_LAST_REDUCTION ahead of INTEGER_INDUC_COND_REDUCTION. Do not check for an epilogue code for EXTRACT_LAST_REDUCTION and defer the transform phase to vectorizable_condition. * tree-vect-stmts.c (vect_finish_stmt_generation_1): New function, split out from... (vect_finish_stmt_generation): ...here. (vect_finish_replace_stmt): New function. (vectorizable_condition): Handle EXTRACT_LAST_REDUCTION. * config/aarch64/aarch64-sve.md (fold_extract_last_<mode>): New pattern. * config/aarch64/aarch64.md (UNSPEC_CLASTB): New unspec. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_fold_extract_last): New proc. * gcc.dg/vect/pr65947-1.c: Update dump messages. Add markup for fold_extract_last. * gcc.dg/vect/pr65947-2.c: Likewise. * gcc.dg/vect/pr65947-3.c: Likewise. * gcc.dg/vect/pr65947-4.c: Likewise. * gcc.dg/vect/pr65947-5.c: Likewise. * gcc.dg/vect/pr65947-6.c: Likewise. * gcc.dg/vect/pr65947-9.c: Likewise. * gcc.dg/vect/pr65947-10.c: Likewise. * gcc.dg/vect/pr65947-12.c: Likewise. * gcc.dg/vect/pr65947-14.c: Likewise. * gcc.dg/vect/pr80631-1.c: Likewise. * gcc.target/aarch64/sve/clastb_1.c: New test. * gcc.target/aarch64/sve/clastb_1_run.c: Likewise. * gcc.target/aarch64/sve/clastb_2.c: Likewise. * gcc.target/aarch64/sve/clastb_2_run.c: Likewise. * gcc.target/aarch64/sve/clastb_3.c: Likewise. * gcc.target/aarch64/sve/clastb_3_run.c: Likewise. * gcc.target/aarch64/sve/clastb_4.c: Likewise. * gcc.target/aarch64/sve/clastb_4_run.c: Likewise. * gcc.target/aarch64/sve/clastb_5.c: Likewise. * gcc.target/aarch64/sve/clastb_5_run.c: Likewise. * gcc.target/aarch64/sve/clastb_6.c: Likewise. * gcc.target/aarch64/sve/clastb_6_run.c: Likewise. * gcc.target/aarch64/sve/clastb_7.c: Likewise. * gcc.target/aarch64/sve/clastb_7_run.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256633
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c129
1 files changed, 86 insertions, 43 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 59cd1c4..b3779e7 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4036,7 +4036,7 @@ static void
vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
int ncopies)
{
- int prologue_cost = 0, epilogue_cost = 0;
+ int prologue_cost = 0, epilogue_cost = 0, inside_cost;
enum tree_code code;
optab optab;
tree vectype;
@@ -4055,13 +4055,11 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
/* Condition reductions generate two reductions in the loop. */
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ vect_reduction_type reduction_type
+ = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+ if (reduction_type == COND_REDUCTION)
ncopies *= 2;
- /* Cost of reduction op inside loop. */
- unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
-
vectype = STMT_VINFO_VECTYPE (stmt_info);
mode = TYPE_MODE (vectype);
orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
@@ -4071,14 +4069,30 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
code = gimple_assign_rhs_code (orig_stmt);
- /* Add in cost for initial definition.
- For cond reduction we have four vectors: initial index, step, initial
- result of the data reduction, initial value of the index reduction. */
- int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- == COND_REDUCTION ? 4 : 1;
- prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
- scalar_to_vec, stmt_info, 0,
- vect_prologue);
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ /* No extra instructions needed in the prologue. */
+ prologue_cost = 0;
+
+ /* Count NCOPIES FOLD_EXTRACT_LAST operations. */
+ inside_cost = add_stmt_cost (target_cost_data, ncopies, vec_to_scalar,
+ stmt_info, 0, vect_body);
+ }
+ else
+ {
+ /* Add in cost for initial definition.
+ For cond reduction we have four vectors: initial index, step,
+ initial result of the data reduction, initial value of the index
+ reduction. */
+ int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1;
+ prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
+ scalar_to_vec, stmt_info, 0,
+ vect_prologue);
+
+ /* Cost of reduction op inside loop. */
+ inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
+ }
/* Determine cost of epilogue code.
@@ -4089,7 +4103,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
{
if (reduc_fn != IFN_LAST)
{
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ if (reduction_type == COND_REDUCTION)
{
/* An EQ stmt and an COND_EXPR stmt. */
epilogue_cost += add_stmt_cost (target_cost_data, 2,
@@ -4114,7 +4128,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
vect_epilogue);
}
}
- else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ else if (reduction_type == COND_REDUCTION)
{
unsigned estimated_nunits = vect_nunits_for_cost (vectype);
/* Extraction of scalar elements. */
@@ -4128,6 +4142,9 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
scalar_stmt, stmt_info, 0,
vect_epilogue);
}
+ else if (reduction_type == EXTRACT_LAST_REDUCTION)
+ /* No extra instructions need in the epilogue. */
+ ;
else
{
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
@@ -4292,6 +4309,9 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val,
return vect_create_destination_var (init_val, vectype);
}
+ vect_reduction_type reduction_type
+ = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo);
+
/* In case of a nested reduction do not use an adjustment def as
that case is not supported by the epilogue generation correctly
if ncopies is not one. */
@@ -4364,7 +4384,8 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val,
if (adjustment_def)
{
*adjustment_def = NULL_TREE;
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
+ if (reduction_type != COND_REDUCTION
+ && reduction_type != EXTRACT_LAST_REDUCTION)
{
init_def = vect_get_vec_def_for_operand (init_val, stmt);
break;
@@ -6179,6 +6200,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt)))
reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt));
+ if (STMT_VINFO_VEC_REDUCTION_TYPE (vinfo_for_stmt (reduc_stmt))
+ == EXTRACT_LAST_REDUCTION)
+ /* Leave the scalar phi in place. */
+ return true;
+
gcc_assert (is_gimple_assign (reduc_stmt));
for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
{
@@ -6435,7 +6461,28 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
/* If we have a condition reduction, see if we can simplify it further. */
if (v_reduc_type == COND_REDUCTION)
{
- if (cond_reduc_dt == vect_induction_def)
+ /* Loop peeling modifies initial value of reduction PHI, which
+ makes the reduction stmt to be transformed different to the
+ original stmt analyzed. We need to record reduction code for
+ CONST_COND_REDUCTION type reduction at analyzing stage, thus
+ it can be used directly at transform stage. */
+ if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
+ || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
+ {
+ /* Also set the reduction type to CONST_COND_REDUCTION. */
+ gcc_assert (cond_reduc_dt == vect_constant_def);
+ STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
+ }
+ else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
+ vectype_in, OPTIMIZE_FOR_SPEED))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "optimizing condition reduction with"
+ " FOLD_EXTRACT_LAST.\n");
+ STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION;
+ }
+ else if (cond_reduc_dt == vect_induction_def)
{
stmt_vec_info cond_stmt_vinfo = vinfo_for_stmt (cond_reduc_def_stmt);
tree base
@@ -6478,19 +6525,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
= INTEGER_INDUC_COND_REDUCTION;
}
}
-
- /* Loop peeling modifies initial value of reduction PHI, which
- makes the reduction stmt to be transformed different to the
- original stmt analyzed. We need to record reduction code for
- CONST_COND_REDUCTION type reduction at analyzing stage, thus
- it can be used directly at transform stage. */
- if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
- || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
- {
- /* Also set the reduction type to CONST_COND_REDUCTION. */
- gcc_assert (cond_reduc_dt == vect_constant_def);
- STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
- }
else if (cond_reduc_dt == vect_constant_def)
{
enum vect_def_type cond_initial_dt;
@@ -6644,12 +6678,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
(and also the same tree-code) when generating the epilog code and
when generating the code inside the loop. */
- if (orig_stmt)
+ vect_reduction_type reduction_type
+ = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+ if (orig_stmt && reduction_type == TREE_CODE_REDUCTION)
{
/* This is a reduction pattern: get the vectype from the type of the
reduction variable, and get the tree-code from orig_stmt. */
- gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- == TREE_CODE_REDUCTION);
orig_code = gimple_assign_rhs_code (orig_stmt);
gcc_assert (vectype_out);
vec_mode = TYPE_MODE (vectype_out);
@@ -6665,13 +6699,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
/* For simple condition reductions, replace with the actual expression
we want to base our reduction around. */
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
+ if (reduction_type == CONST_COND_REDUCTION)
{
orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
}
- else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- == INTEGER_INDUC_COND_REDUCTION)
+ else if (reduction_type == INTEGER_INDUC_COND_REDUCTION)
orig_code = cond_reduc_op_code;
}
@@ -6693,7 +6726,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
reduc_fn = IFN_LAST;
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
+ if (reduction_type == TREE_CODE_REDUCTION
+ || reduction_type == INTEGER_INDUC_COND_REDUCTION
+ || reduction_type == CONST_COND_REDUCTION)
{
if (reduction_fn_for_scalar_code (orig_code, &reduc_fn))
{
@@ -6720,7 +6755,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
}
}
}
- else
+ else if (reduction_type == COND_REDUCTION)
{
int scalar_precision
= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
@@ -6733,7 +6768,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
reduc_fn = IFN_REDUC_MAX;
}
- if (reduc_fn == IFN_LAST && !nunits_out.is_constant ())
+ if (reduction_type != EXTRACT_LAST_REDUCTION
+ && reduc_fn == IFN_LAST
+ && !nunits_out.is_constant ())
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6742,8 +6779,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
return false;
}
- if ((double_reduc
- || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
+ if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
&& ncopies > 1)
{
if (dump_enabled_p ())
@@ -6833,7 +6869,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
}
}
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ if (reduction_type == COND_REDUCTION)
{
widest_int ni;
@@ -6970,6 +7006,13 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ gcc_assert (!slp_node);
+ return vectorizable_condition (stmt, gsi, vec_stmt,
+ NULL, reduc_index, NULL);
+ }
+
/* Create the destination vector */
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);