diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-13 17:59:59 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-13 17:59:59 +0000 |
commit | bb6c2b68d6961dfe98bece34e4418d7287ce7089 (patch) | |
tree | ecb67de45fc3758acd23d527e406c0441be496d4 /gcc/tree-vect-loop.c | |
parent | bfe1bb57ba4dfd78f8c1ac7d46cf27e8e7408676 (diff) | |
download | gcc-bb6c2b68d6961dfe98bece34e4418d7287ce7089.zip gcc-bb6c2b68d6961dfe98bece34e4418d7287ce7089.tar.gz gcc-bb6c2b68d6961dfe98bece34e4418d7287ce7089.tar.bz2 |
Add support for conditional reductions using SVE CLASTB
This patch uses SVE CLASTB to optimise conditional reductions. It means
that we no longer need to maintain a separate index vector to record
the most recent valid value, and no longer need to worry about overflow
cases.
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* doc/md.texi (fold_extract_last_@var{m}): Document.
* doc/sourcebuild.texi (vect_fold_extract_last): Likewise.
* optabs.def (fold_extract_last_optab): New optab.
* internal-fn.def (FOLD_EXTRACT_LAST): New internal function.
* internal-fn.c (fold_extract_direct): New macro.
(expand_fold_extract_optab_fn): Likewise.
(direct_fold_extract_optab_supported_p): Likewise.
* tree-vectorizer.h (EXTRACT_LAST_REDUCTION): New vect_reduction_type.
* tree-vect-loop.c (vect_model_reduction_cost): Handle
EXTRACT_LAST_REDUCTION.
(get_initial_def_for_reduction): Do not create an initial vector
for EXTRACT_LAST_REDUCTION reductions.
(vectorizable_reduction): Leave the scalar phi in place for
EXTRACT_LAST_REDUCTIONs. Try using EXTRACT_LAST_REDUCTION
ahead of INTEGER_INDUC_COND_REDUCTION. Do not check for an
epilogue code for EXTRACT_LAST_REDUCTION and defer the
transform phase to vectorizable_condition.
* tree-vect-stmts.c (vect_finish_stmt_generation_1): New function,
split out from...
(vect_finish_stmt_generation): ...here.
(vect_finish_replace_stmt): New function.
(vectorizable_condition): Handle EXTRACT_LAST_REDUCTION.
* config/aarch64/aarch64-sve.md (fold_extract_last_<mode>): New
pattern.
* config/aarch64/aarch64.md (UNSPEC_CLASTB): New unspec.
gcc/testsuite/
* lib/target-supports.exp
(check_effective_target_vect_fold_extract_last): New proc.
* gcc.dg/vect/pr65947-1.c: Update dump messages. Add markup
for fold_extract_last.
* gcc.dg/vect/pr65947-2.c: Likewise.
* gcc.dg/vect/pr65947-3.c: Likewise.
* gcc.dg/vect/pr65947-4.c: Likewise.
* gcc.dg/vect/pr65947-5.c: Likewise.
* gcc.dg/vect/pr65947-6.c: Likewise.
* gcc.dg/vect/pr65947-9.c: Likewise.
* gcc.dg/vect/pr65947-10.c: Likewise.
* gcc.dg/vect/pr65947-12.c: Likewise.
* gcc.dg/vect/pr65947-14.c: Likewise.
* gcc.dg/vect/pr80631-1.c: Likewise.
* gcc.target/aarch64/sve/clastb_1.c: New test.
* gcc.target/aarch64/sve/clastb_1_run.c: Likewise.
* gcc.target/aarch64/sve/clastb_2.c: Likewise.
* gcc.target/aarch64/sve/clastb_2_run.c: Likewise.
* gcc.target/aarch64/sve/clastb_3.c: Likewise.
* gcc.target/aarch64/sve/clastb_3_run.c: Likewise.
* gcc.target/aarch64/sve/clastb_4.c: Likewise.
* gcc.target/aarch64/sve/clastb_4_run.c: Likewise.
* gcc.target/aarch64/sve/clastb_5.c: Likewise.
* gcc.target/aarch64/sve/clastb_5_run.c: Likewise.
* gcc.target/aarch64/sve/clastb_6.c: Likewise.
* gcc.target/aarch64/sve/clastb_6_run.c: Likewise.
* gcc.target/aarch64/sve/clastb_7.c: Likewise.
* gcc.target/aarch64/sve/clastb_7_run.c: Likewise.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256633
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 129 |
1 files changed, 86 insertions, 43 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 59cd1c4..b3779e7 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4036,7 +4036,7 @@ static void vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, int ncopies) { - int prologue_cost = 0, epilogue_cost = 0; + int prologue_cost = 0, epilogue_cost = 0, inside_cost; enum tree_code code; optab optab; tree vectype; @@ -4055,13 +4055,11 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info)); /* Condition reductions generate two reductions in the loop. */ - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) + vect_reduction_type reduction_type + = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info); + if (reduction_type == COND_REDUCTION) ncopies *= 2; - /* Cost of reduction op inside loop. */ - unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt, - stmt_info, 0, vect_body); - vectype = STMT_VINFO_VECTYPE (stmt_info); mode = TYPE_MODE (vectype); orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); @@ -4071,14 +4069,30 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, code = gimple_assign_rhs_code (orig_stmt); - /* Add in cost for initial definition. - For cond reduction we have four vectors: initial index, step, initial - result of the data reduction, initial value of the index reduction. */ - int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - == COND_REDUCTION ? 4 : 1; - prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts, - scalar_to_vec, stmt_info, 0, - vect_prologue); + if (reduction_type == EXTRACT_LAST_REDUCTION) + { + /* No extra instructions needed in the prologue. */ + prologue_cost = 0; + + /* Count NCOPIES FOLD_EXTRACT_LAST operations. */ + inside_cost = add_stmt_cost (target_cost_data, ncopies, vec_to_scalar, + stmt_info, 0, vect_body); + } + else + { + /* Add in cost for initial definition. + For cond reduction we have four vectors: initial index, step, + initial result of the data reduction, initial value of the index + reduction. */ + int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1; + prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts, + scalar_to_vec, stmt_info, 0, + vect_prologue); + + /* Cost of reduction op inside loop. */ + inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt, + stmt_info, 0, vect_body); + } /* Determine cost of epilogue code. @@ -4089,7 +4103,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, { if (reduc_fn != IFN_LAST) { - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) + if (reduction_type == COND_REDUCTION) { /* An EQ stmt and an COND_EXPR stmt. */ epilogue_cost += add_stmt_cost (target_cost_data, 2, @@ -4114,7 +4128,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, vect_epilogue); } } - else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) + else if (reduction_type == COND_REDUCTION) { unsigned estimated_nunits = vect_nunits_for_cost (vectype); /* Extraction of scalar elements. */ @@ -4128,6 +4142,9 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, scalar_stmt, stmt_info, 0, vect_epilogue); } + else if (reduction_type == EXTRACT_LAST_REDUCTION) + /* No extra instructions need in the epilogue. */ + ; else { int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); @@ -4292,6 +4309,9 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val, return vect_create_destination_var (init_val, vectype); } + vect_reduction_type reduction_type + = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo); + /* In case of a nested reduction do not use an adjustment def as that case is not supported by the epilogue generation correctly if ncopies is not one. */ @@ -4364,7 +4384,8 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val, if (adjustment_def) { *adjustment_def = NULL_TREE; - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION) + if (reduction_type != COND_REDUCTION + && reduction_type != EXTRACT_LAST_REDUCTION) { init_def = vect_get_vec_def_for_operand (init_val, stmt); break; @@ -6179,6 +6200,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt))) reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt)); + if (STMT_VINFO_VEC_REDUCTION_TYPE (vinfo_for_stmt (reduc_stmt)) + == EXTRACT_LAST_REDUCTION) + /* Leave the scalar phi in place. */ + return true; + gcc_assert (is_gimple_assign (reduc_stmt)); for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k) { @@ -6435,7 +6461,28 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, /* If we have a condition reduction, see if we can simplify it further. */ if (v_reduc_type == COND_REDUCTION) { - if (cond_reduc_dt == vect_induction_def) + /* Loop peeling modifies initial value of reduction PHI, which + makes the reduction stmt to be transformed different to the + original stmt analyzed. We need to record reduction code for + CONST_COND_REDUCTION type reduction at analyzing stage, thus + it can be used directly at transform stage. */ + if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR + || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR) + { + /* Also set the reduction type to CONST_COND_REDUCTION. */ + gcc_assert (cond_reduc_dt == vect_constant_def); + STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION; + } + else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST, + vectype_in, OPTIMIZE_FOR_SPEED)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "optimizing condition reduction with" + " FOLD_EXTRACT_LAST.\n"); + STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION; + } + else if (cond_reduc_dt == vect_induction_def) { stmt_vec_info cond_stmt_vinfo = vinfo_for_stmt (cond_reduc_def_stmt); tree base @@ -6478,19 +6525,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, = INTEGER_INDUC_COND_REDUCTION; } } - - /* Loop peeling modifies initial value of reduction PHI, which - makes the reduction stmt to be transformed different to the - original stmt analyzed. We need to record reduction code for - CONST_COND_REDUCTION type reduction at analyzing stage, thus - it can be used directly at transform stage. */ - if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR - || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR) - { - /* Also set the reduction type to CONST_COND_REDUCTION. */ - gcc_assert (cond_reduc_dt == vect_constant_def); - STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION; - } else if (cond_reduc_dt == vect_constant_def) { enum vect_def_type cond_initial_dt; @@ -6644,12 +6678,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, (and also the same tree-code) when generating the epilog code and when generating the code inside the loop. */ - if (orig_stmt) + vect_reduction_type reduction_type + = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info); + if (orig_stmt && reduction_type == TREE_CODE_REDUCTION) { /* This is a reduction pattern: get the vectype from the type of the reduction variable, and get the tree-code from orig_stmt. */ - gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - == TREE_CODE_REDUCTION); orig_code = gimple_assign_rhs_code (orig_stmt); gcc_assert (vectype_out); vec_mode = TYPE_MODE (vectype_out); @@ -6665,13 +6699,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, /* For simple condition reductions, replace with the actual expression we want to base our reduction around. */ - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION) + if (reduction_type == CONST_COND_REDUCTION) { orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info); gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR); } - else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) - == INTEGER_INDUC_COND_REDUCTION) + else if (reduction_type == INTEGER_INDUC_COND_REDUCTION) orig_code = cond_reduc_op_code; } @@ -6693,7 +6726,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, reduc_fn = IFN_LAST; - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION) + if (reduction_type == TREE_CODE_REDUCTION + || reduction_type == INTEGER_INDUC_COND_REDUCTION + || reduction_type == CONST_COND_REDUCTION) { if (reduction_fn_for_scalar_code (orig_code, &reduc_fn)) { @@ -6720,7 +6755,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, } } } - else + else if (reduction_type == COND_REDUCTION) { int scalar_precision = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)); @@ -6733,7 +6768,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, reduc_fn = IFN_REDUC_MAX; } - if (reduc_fn == IFN_LAST && !nunits_out.is_constant ()) + if (reduction_type != EXTRACT_LAST_REDUCTION + && reduc_fn == IFN_LAST + && !nunits_out.is_constant ()) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6742,8 +6779,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, return false; } - if ((double_reduc - || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION) + if ((double_reduc || reduction_type != TREE_CODE_REDUCTION) && ncopies > 1) { if (dump_enabled_p ()) @@ -6833,7 +6869,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, } } - if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) + if (reduction_type == COND_REDUCTION) { widest_int ni; @@ -6970,6 +7006,13 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + if (reduction_type == EXTRACT_LAST_REDUCTION) + { + gcc_assert (!slp_node); + return vectorizable_condition (stmt, gsi, vec_stmt, + NULL, reduc_index, NULL); + } + /* Create the destination vector */ vec_dest = vect_create_destination_var (scalar_dest, vectype_out); |