aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-10-10 14:00:11 +0200
committerRichard Biener <rguenth@gcc.gnu.org>2024-10-11 07:59:27 +0200
commit36b9c5e6f3301d3d0165f578d020dcd350cd516d (patch)
treec4993f6c7fb3af02a9d1640c6e048c05eebb78f2
parent9f2f108a8a68c7b7b2de5350439a8ab8e17a54da (diff)
downloadgcc-36b9c5e6f3301d3d0165f578d020dcd350cd516d.zip
gcc-36b9c5e6f3301d3d0165f578d020dcd350cd516d.tar.gz
gcc-36b9c5e6f3301d3d0165f578d020dcd350cd516d.tar.bz2
Fix possible wrong-code with masked store-lanes
When we're doing masked store-lanes one mask element applies to all loads of one struct element. This requires uniform masks for all of the SLP lanes, something we already compute into STMT_VINFO_SLP_VECT_ONLY but fail to check when doing SLP store-lanes. The following corrects this. The following also adjusts the store-lane heuristic to properly check for masked or non-masked optab support. * tree-vect-slp.cc (vect_slp_prefer_store_lanes_p): Allow passing in of vectype, pass in whether the stores are masked and query the correct optab. (vect_build_slp_instance): Guard store-lanes query with ! STMT_VINFO_SLP_VECT_ONLY, guaranteeing an uniform mask.
-rw-r--r--gcc/tree-vect-slp.cc30
1 files changed, 20 insertions, 10 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 96f1992..3024b87 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3504,17 +3504,22 @@ vect_match_slp_patterns (slp_instance instance, vec_info *vinfo,
}
/* STMT_INFO is a store group of size GROUP_SIZE that we are considering
- splitting into two, with the first split group having size NEW_GROUP_SIZE.
+ vectorizing with VECTYPE that might be NULL. MASKED_P indicates whether
+ the stores are masked.
Return true if we could use IFN_STORE_LANES instead and if that appears
to be the better approach. */
static bool
vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info,
+ tree vectype, bool masked_p,
unsigned int group_size,
unsigned int new_group_size)
{
- tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ if (!vectype)
+ {
+ tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ }
if (!vectype)
return false;
/* Allow the split if one of the two new groups would operate on full
@@ -3528,7 +3533,7 @@ vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info,
if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS (vectype))
|| multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype)))
return false;
- return vect_store_lanes_supported (vectype, group_size, false) != IFN_LAST;
+ return vect_store_lanes_supported (vectype, group_size, masked_p) != IFN_LAST;
}
/* Analyze an SLP instance starting from a group of grouped stores. Call
@@ -3973,6 +3978,10 @@ vect_build_slp_instance (vec_info *vinfo,
else if (is_a <loop_vec_info> (vinfo)
&& (group_size != 1 && i < group_size))
{
+ gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
+ bool masked_p = call
+ && gimple_call_internal_p (call)
+ && internal_fn_mask_index (gimple_call_internal_fn (call)) != -1;
/* There are targets that cannot do even/odd interleaving schemes
so they absolutely need to use load/store-lanes. For now
force single-lane SLP for them - they would be happy with
@@ -3987,9 +3996,10 @@ vect_build_slp_instance (vec_info *vinfo,
bool want_store_lanes
= (! STMT_VINFO_GATHER_SCATTER_P (stmt_info)
&& ! STMT_VINFO_STRIDED_P (stmt_info)
+ && ! STMT_VINFO_SLP_VECT_ONLY (stmt_info)
&& compare_step_with_zero (vinfo, stmt_info) > 0
- && vect_slp_prefer_store_lanes_p (vinfo, stmt_info,
- group_size, 1));
+ && vect_slp_prefer_store_lanes_p (vinfo, stmt_info, NULL_TREE,
+ masked_p, group_size, 1));
if (want_store_lanes || force_single_lane)
i = 1;
@@ -4074,14 +4084,14 @@ vect_build_slp_instance (vec_info *vinfo,
/* Now re-assess whether we want store lanes in case the
discovery ended up producing all single-lane RHSs. */
- if (rhs_common_nlanes == 1
+ if (! want_store_lanes
+ && rhs_common_nlanes == 1
&& ! STMT_VINFO_GATHER_SCATTER_P (stmt_info)
&& ! STMT_VINFO_STRIDED_P (stmt_info)
+ && ! STMT_VINFO_SLP_VECT_ONLY (stmt_info)
&& compare_step_with_zero (vinfo, stmt_info) > 0
&& (vect_store_lanes_supported (SLP_TREE_VECTYPE (rhs_nodes[0]),
- group_size,
- SLP_TREE_CHILDREN
- (rhs_nodes[0]).length () != 1)
+ group_size, masked_p)
!= IFN_LAST))
want_store_lanes = true;