aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-09-23 15:24:01 +0200
committerRichard Biener <rguenth@gcc.gnu.org>2024-09-27 09:20:24 +0200
commitb1c7095a1da11d2543222d98243d10f9cc9823ce (patch)
tree08efd7d0227807613d047b8243c6f6bd9664a6e4
parent3db9e99165968af8479468cd373990da2f116e3b (diff)
downloadgcc-b1c7095a1da11d2543222d98243d10f9cc9823ce.zip
gcc-b1c7095a1da11d2543222d98243d10f9cc9823ce.tar.gz
gcc-b1c7095a1da11d2543222d98243d10f9cc9823ce.tar.bz2
tree-optimization/116818 - try VMAT_GATHER_SCATTER also for SLP
When not doing SLP and we end up with VMAT_ELEMENTWISE we consider using strided loads, aka VMAT_GATHER_SCATTER. The following moves this logic down to also apply to SLP where we now can end up using VMAT_ELEMENTWISE as well. PR tree-optimization/116818 * tree-vect-stmts.cc (get_group_load_store_type): Consider VMAT_GATHER_SCATTER instead of VMAT_ELEMENTWISE also for SLP. (vectorizable_load): For single-lane VMAT_GATHER_SCATTER also ignore permutations.
-rw-r--r--gcc/tree-vect-stmts.cc29
1 files changed, 15 insertions, 14 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index a8031b4..0e75e3b 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2260,21 +2260,21 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
}
}
}
+ }
- /* As a last resort, trying using a gather load or scatter store.
+ /* As a last resort, trying using a gather load or scatter store.
- ??? Although the code can handle all group sizes correctly,
- it probably isn't a win to use separate strided accesses based
- on nearby locations. Or, even if it's a win over scalar code,
- it might not be a win over vectorizing at a lower VF, if that
- allows us to use contiguous accesses. */
- if (*memory_access_type == VMAT_ELEMENTWISE
- && single_element_p
- && loop_vinfo
- && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
- masked_p, gs_info))
- *memory_access_type = VMAT_GATHER_SCATTER;
- }
+ ??? Although the code can handle all group sizes correctly,
+ it probably isn't a win to use separate strided accesses based
+ on nearby locations. Or, even if it's a win over scalar code,
+ it might not be a win over vectorizing at a lower VF, if that
+ allows us to use contiguous accesses. */
+ if (*memory_access_type == VMAT_ELEMENTWISE
+ && single_element_p
+ && loop_vinfo
+ && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
+ masked_p, gs_info))
+ *memory_access_type = VMAT_GATHER_SCATTER;
if (*memory_access_type == VMAT_GATHER_SCATTER
|| *memory_access_type == VMAT_ELEMENTWISE)
@@ -10063,7 +10063,8 @@ vectorizable_load (vec_info *vinfo,
get_group_load_store_type. */
if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
- && !(memory_access_type == VMAT_ELEMENTWISE
+ && !((memory_access_type == VMAT_ELEMENTWISE
+ || memory_access_type == VMAT_GATHER_SCATTER)
&& SLP_TREE_LANES (slp_node) == 1))
{
slp_perm = true;