diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-13 17:59:50 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-13 17:59:50 +0000 |
commit | bfe1bb57ba4dfd78f8c1ac7d46cf27e8e7408676 (patch) | |
tree | 489dd37405fb9ba664020489aaec102ba690e05f /gcc/tree-vect-loop.c | |
parent | 76a34e3f8565e36d164006e62f7380bfe6057154 (diff) | |
download | gcc-bfe1bb57ba4dfd78f8c1ac7d46cf27e8e7408676.zip gcc-bfe1bb57ba4dfd78f8c1ac7d46cf27e8e7408676.tar.gz gcc-bfe1bb57ba4dfd78f8c1ac7d46cf27e8e7408676.tar.bz2 |
Add support for vectorising live-out values using SVE LASTB
This patch uses the SVE LASTB instruction to optimise cases in which
a value produced by the final scalar iteration of a vectorised loop is
live outside the loop. Previously this situation would stop us from
using a fully-masked loop.
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* doc/md.texi (extract_last_@var{m}): Document.
* optabs.def (extract_last_optab): New optab.
* internal-fn.def (EXTRACT_LAST): New internal function.
* internal-fn.c (cond_unary_direct): New macro.
(expand_cond_unary_optab_fn): Likewise.
(direct_cond_unary_optab_supported_p): Likewise.
* tree-vect-loop.c (vectorizable_live_operation): Allow fully-masked
loops using EXTRACT_LAST.
* config/aarch64/aarch64-sve.md (aarch64_sve_lastb<mode>): Rename to...
(extract_last_<mode>): ...this optab.
(vec_extract<mode><Vel>): Update accordingly.
gcc/testsuite/
* gcc.target/aarch64/sve/live_1.c: New test.
* gcc.target/aarch64/sve/live_1_run.c: Likewise.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256632
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 87 |
1 files changed, 70 insertions, 17 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index d7cc12f..59cd1c4 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -7812,16 +7812,43 @@ vectorizable_live_operation (gimple *stmt, if (!vec_stmt) { + /* No transformation required. */ if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because " - "a value is live outside the loop.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, + OPTIMIZE_FOR_SPEED)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because " + "the target doesn't support extract last " + "reduction.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } + else if (slp_node) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because an " + "SLP statement is live after the loop.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } + else if (ncopies > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't use a fully-masked loop because" + " ncopies is greater than 1.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } + else + { + gcc_assert (ncopies == 1 && !slp_node); + vect_record_loop_mask (loop_vinfo, + &LOOP_VINFO_MASKS (loop_vinfo), + 1, vectype); + } } - - /* No transformation required. */ return true; } @@ -7838,12 +7865,12 @@ vectorizable_live_operation (gimple *stmt, : TYPE_SIZE (TREE_TYPE (vectype))); vec_bitsize = TYPE_SIZE (vectype); - gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); - /* Get the vectorized lhs of STMT and the lane to use (counted in bits). */ tree vec_lhs, bitstart; if (slp_node) { + gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); + /* Get the correct slp vectorized stmt. */ vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]); @@ -7855,6 +7882,8 @@ vectorizable_live_operation (gimple *stmt, { enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info); vec_lhs = vect_get_vec_def_for_operand_1 (stmt, dt); + gcc_checking_assert (ncopies == 1 + || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); /* For multiple copies, get the last copy. */ for (int i = 1; i < ncopies; ++i) @@ -7865,15 +7894,39 @@ vectorizable_live_operation (gimple *stmt, bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize); } - /* Create a new vectorized stmt for the uses of STMT and insert outside the - loop. */ gimple_seq stmts = NULL; - tree bftype = TREE_TYPE (vectype); - if (VECTOR_BOOLEAN_TYPE_P (vectype)) - bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1); - tree new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart); - new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), &stmts, - true, NULL_TREE); + tree new_tree; + if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + { + /* Emit: + + SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK> + + where VEC_LHS is the vectorized live-out result and MASK is + the loop mask for the final iteration. */ + gcc_assert (ncopies == 1 && !slp_node); + tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info)); + tree scalar_res = make_ssa_name (scalar_type); + tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo), + 1, vectype, 0); + gcall *new_stmt = gimple_build_call_internal (IFN_EXTRACT_LAST, + 2, mask, vec_lhs); + gimple_call_set_lhs (new_stmt, scalar_res); + gimple_seq_add_stmt (&stmts, new_stmt); + + /* Convert the extracted vector element to the required scalar type. */ + new_tree = gimple_convert (&stmts, lhs_type, scalar_res); + } + else + { + tree bftype = TREE_TYPE (vectype); + if (VECTOR_BOOLEAN_TYPE_P (vectype)) + bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1); + new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart); + new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), + &stmts, true, NULL_TREE); + } + if (stmts) gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts); |