aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-11-12 10:31:30 +0100
committerRichard Biener <rguenth@gcc.gnu.org>2024-11-12 14:57:32 +0100
commit0d4b254b20a9193ab261d02f8a063e21816f85e4 (patch)
tree1b88def31c54824e092b9a3468294399c7959c56 /gcc
parent1b35b929354c41f3e2682aa7a30013e1bfe31bd4 (diff)
downloadgcc-0d4b254b20a9193ab261d02f8a063e21816f85e4.zip
gcc-0d4b254b20a9193ab261d02f8a063e21816f85e4.tar.gz
gcc-0d4b254b20a9193ab261d02f8a063e21816f85e4.tar.bz2
tree-optimization/116973 - SLP permute lower heuristic and single-lane SLP
When forcing single-lane SLP to emulate non-SLP behavior we need to disable heuristics designed to optimize SLP loads and instead in all cases resort to an interleaving scheme as requested by forcefully doing single-lane SLP. This fixes the remaining fallout for --param vect-force-slp=1 on x86. PR tree-optimization/116973 * tree-vect-slp.cc (vect_lower_load_permutations): Add force_single_lane parameter. Disable heuristic that keeps some load-permutations. (vect_analyze_slp): Pass force_single_lane to vect_lower_load_permutations.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/tree-vect-slp.cc17
1 files changed, 11 insertions, 6 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index eebac19..d69fdc0 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4402,7 +4402,8 @@ vllp_cmp (const void *a_, const void *b_)
static void
vect_lower_load_permutations (loop_vec_info loop_vinfo,
scalar_stmts_to_slp_tree_map_t *bst_map,
- const array_slice<slp_tree> &loads)
+ const array_slice<slp_tree> &loads,
+ bool force_single_lane)
{
/* We at this point want to lower without a fixed VF or vector
size in mind which means we cannot actually compute whether we
@@ -4494,7 +4495,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
extracting it from the larger load.
??? Long-term some of the lowering should move to where
the vector types involved are fixed. */
- if (ld_lanes_lanes == 0
+ if (!force_single_lane
+ && ld_lanes_lanes == 0
&& contiguous
&& (SLP_TREE_LANES (load) > 1 || loads.size () == 1)
&& pow2p_hwi (SLP_TREE_LANES (load))
@@ -4668,7 +4670,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
static void
vect_lower_load_permutations (loop_vec_info loop_vinfo,
- scalar_stmts_to_slp_tree_map_t *bst_map)
+ scalar_stmts_to_slp_tree_map_t *bst_map,
+ bool force_single_lane)
{
/* Gather and sort loads across all instances. */
hash_set<slp_tree> visited;
@@ -4696,14 +4699,16 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
if (STMT_VINFO_GROUPED_ACCESS (a0))
vect_lower_load_permutations (loop_vinfo, bst_map,
make_array_slice (&loads[firsti],
- i - firsti));
+ i - firsti),
+ force_single_lane);
firsti = i;
}
if (firsti < loads.length ()
&& STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (loads[firsti])[0]))
vect_lower_load_permutations (loop_vinfo, bst_map,
make_array_slice (&loads[firsti],
- loads.length () - firsti));
+ loads.length () - firsti),
+ force_single_lane);
}
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
@@ -5097,7 +5102,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
like schemes. */
if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
{
- vect_lower_load_permutations (loop_vinfo, bst_map);
+ vect_lower_load_permutations (loop_vinfo, bst_map, force_single_lane);
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,