aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-09-05 10:46:58 +0200
committerRichard Biener <rguenth@gcc.gnu.org>2024-09-06 09:16:03 +0200
commit116bfbc806a7aa3f1ae2a3b3eb38d6bb65e0d0a7 (patch)
tree513187641a4ff198fa0d14328fae2d29119393fb
parent1dd175a0ccdd0ff4e7cb6668164a4fe99e47015d (diff)
downloadgcc-116bfbc806a7aa3f1ae2a3b3eb38d6bb65e0d0a7.zip
gcc-116bfbc806a7aa3f1ae2a3b3eb38d6bb65e0d0a7.tar.gz
gcc-116bfbc806a7aa3f1ae2a3b3eb38d6bb65e0d0a7.tar.bz2
tree-optimization/116609 - SLP live lane vectorization with partial vectors
The following implements the simple case of single-lane SLP when using partial vectors which can use the VEC_EXTRACT_LAST code generation without changes. I'll keep the PR open for further enhancements. This avoids FAILs of gcc.target/aarch64/sve/live_1.c when using single-lane SLP for non-grouped stores. PR tree-optimization/116609 * tree-vect-loop.cc (vectorizable_live_operation_1): Support partial vectors for single-lane SLP.
-rw-r--r--gcc/tree-vect-loop.cc18
1 files changed, 11 insertions, 7 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 242d5e2..31cdc4b 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10961,7 +10961,8 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
where VEC_LHS is the vectorized live-out result and MASK is
the loop mask for the final iteration. */
- gcc_assert (ncopies == 1 && !slp_node);
+ gcc_assert (ncopies == 1
+ && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
gimple_seq tem = NULL;
gimple_stmt_iterator gsi = gsi_last (tem);
tree len = vect_get_loop_len (loop_vinfo, &gsi,
@@ -10995,7 +10996,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
where VEC_LHS is the vectorized live-out result and MASK is
the loop mask for the final iteration. */
- gcc_assert (!slp_node);
+ gcc_assert (!slp_node || SLP_TREE_LANES (slp_node) == 1);
tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
gimple_seq tem = NULL;
gimple_stmt_iterator gsi = gsi_last (tem);
@@ -11147,7 +11148,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
/* No transformation required. */
if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
- if (slp_node)
+ if (slp_node && SLP_TREE_LANES (slp_node) != 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11156,7 +11157,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
"the loop.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- else if (ncopies > 1)
+ else if (ncopies > 1
+ || (slp_node && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11166,7 +11168,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
}
else
{
- gcc_assert (ncopies == 1 && !slp_node);
+ gcc_assert (ncopies == 1
+ && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
OPTIMIZE_FOR_SPEED))
vect_record_loop_mask (loop_vinfo,
@@ -11213,8 +11216,9 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
if (slp_node)
{
gcc_assert (!loop_vinfo
- || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
- && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));
+ || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ || SLP_TREE_LANES (slp_node) == 1));
/* Get the correct slp vectorized stmt. */
vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];