diff options
author | Richard Biener <rguenther@suse.de> | 2024-09-05 10:46:58 +0200 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2024-09-06 09:16:03 +0200 |
commit | 116bfbc806a7aa3f1ae2a3b3eb38d6bb65e0d0a7 (patch) | |
tree | 513187641a4ff198fa0d14328fae2d29119393fb /gcc | |
parent | 1dd175a0ccdd0ff4e7cb6668164a4fe99e47015d (diff) | |
download | gcc-116bfbc806a7aa3f1ae2a3b3eb38d6bb65e0d0a7.zip gcc-116bfbc806a7aa3f1ae2a3b3eb38d6bb65e0d0a7.tar.gz gcc-116bfbc806a7aa3f1ae2a3b3eb38d6bb65e0d0a7.tar.bz2 |
tree-optimization/116609 - SLP live lane vectorization with partial vectors
The following implements the simple case of single-lane SLP when
using partial vectors which can use the VEC_EXTRACT_LAST code
generation without changes. I'll keep the PR open for further
enhancements.
This avoids FAILs of gcc.target/aarch64/sve/live_1.c when using
single-lane SLP for non-grouped stores.
PR tree-optimization/116609
* tree-vect-loop.cc (vectorizable_live_operation_1): Support
partial vectors for single-lane SLP.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 242d5e2..31cdc4b 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10961,7 +10961,8 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo, where VEC_LHS is the vectorized live-out result and MASK is the loop mask for the final iteration. */ - gcc_assert (ncopies == 1 && !slp_node); + gcc_assert (ncopies == 1 + && (!slp_node || SLP_TREE_LANES (slp_node) == 1)); gimple_seq tem = NULL; gimple_stmt_iterator gsi = gsi_last (tem); tree len = vect_get_loop_len (loop_vinfo, &gsi, @@ -10995,7 +10996,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo, where VEC_LHS is the vectorized live-out result and MASK is the loop mask for the final iteration. */ - gcc_assert (!slp_node); + gcc_assert (!slp_node || SLP_TREE_LANES (slp_node) == 1); tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info)); gimple_seq tem = NULL; gimple_stmt_iterator gsi = gsi_last (tem); @@ -11147,7 +11148,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, /* No transformation required. */ if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) { - if (slp_node) + if (slp_node && SLP_TREE_LANES (slp_node) != 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -11156,7 +11157,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, "the loop.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } - else if (ncopies > 1) + else if (ncopies > 1 + || (slp_node && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -11166,7 +11168,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, } else { - gcc_assert (ncopies == 1 && !slp_node); + gcc_assert (ncopies == 1 + && (!slp_node || SLP_TREE_LANES (slp_node) == 1)); if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, OPTIMIZE_FOR_SPEED)) vect_record_loop_mask (loop_vinfo, @@ -11213,8 +11216,9 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, if (slp_node) { gcc_assert (!loop_vinfo - || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) - && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))); + || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) + || SLP_TREE_LANES (slp_node) == 1)); /* Get the correct slp vectorized stmt. */ vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry]; |