aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2025-09-12 13:20:46 +0200
committerRichard Biener <rguenther@suse.de>2025-09-14 11:05:42 +0200
commit80f9440b5ecb1c3ab943b5c862f84e06fbeabd89 (patch)
tree404f6480b0062e219276e38d129d0e5704a80e91 /gcc
parent1744dc23b311907d04edf312a3dbafa2e4f6e6a2 (diff)
downloadgcc-80f9440b5ecb1c3ab943b5c862f84e06fbeabd89.zip
gcc-80f9440b5ecb1c3ab943b5c862f84e06fbeabd89.tar.gz
gcc-80f9440b5ecb1c3ab943b5c862f84e06fbeabd89.tar.bz2
Do less redundant vect_transform_slp_perm_load calls
The following tries to do vect_transform_slp_perm_load exactly once during analysis and once during transform. There's a 2nd case left during analysis in get_load_store_type. Temporarily this records n_perms in the load-store info and verifies that against the value computed at transform stage. * tree-vectorizer.h (vect_load_store_data::n_perms): New. * tree-vect-stmts.cc (vectorizable_load): Analyze SLP_TREE_LOAD_PERMUTATION only once and remember n_perms. Verify the transform-time n_perms against the value stored during analysis.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/tree-vect-stmts.cc47
-rw-r--r--gcc/tree-vectorizer.h1
2 files changed, 27 insertions, 21 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ba78f60..f78acaf 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9478,6 +9478,7 @@ vectorizable_load (vec_info *vinfo,
/* ??? The following checks should really be part of
get_load_store_type. */
+ unsigned n_perms = -1U;
if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
&& !((memory_access_type == VMAT_ELEMENTWISE
|| mat_gather_scatter_p (memory_access_type))
@@ -9485,7 +9486,7 @@ vectorizable_load (vec_info *vinfo,
{
slp_perm = true;
- if (!loop_vinfo)
+ if (!loop_vinfo && cost_vec)
{
/* In BB vectorization we may not actually use a loaded vector
accessing elements in excess of DR_GROUP_SIZE. */
@@ -9508,17 +9509,21 @@ vectorizable_load (vec_info *vinfo,
}
}
- auto_vec<tree> tem;
- unsigned n_perms;
- if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
- true, &n_perms))
+ if (cost_vec)
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION,
- vect_location,
- "unsupported load permutation\n");
- return false;
+ if (!vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
+ true, &n_perms))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "unsupported load permutation\n");
+ return false;
+ }
+ ls.n_perms = n_perms;
}
+ else
+ n_perms = ls.n_perms;
}
if (slp_node->ldst_lanes
@@ -9989,18 +9994,19 @@ vectorizable_load (vec_info *vinfo,
}
if (slp_perm)
{
- unsigned n_perms;
if (costing_p)
{
- unsigned n_loads;
- vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
- true, &n_perms, &n_loads);
+ gcc_assert (n_perms != -1U);
inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
slp_node, 0, vect_body);
}
else
- vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
- false, &n_perms);
+ {
+ unsigned n_perms2;
+ vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
+ false, &n_perms2);
+ gcc_assert (n_perms == n_perms2);
+ }
}
if (costing_p)
@@ -11378,25 +11384,24 @@ vectorizable_load (vec_info *vinfo,
if (slp_perm)
{
- unsigned n_perms;
/* For SLP we know we've seen all possible uses of dr_chain so
direct vect_transform_slp_perm_load to DCE the unused parts.
??? This is a hack to prevent compile-time issues as seen
in PR101120 and friends. */
if (costing_p)
{
- vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
- true, &n_perms, nullptr);
+ gcc_assert (n_perms != -1U);
if (n_perms != 0)
inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
slp_node, 0, vect_body);
}
else
{
+ unsigned n_perms2;
bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
- gsi, vf, false, &n_perms,
+ gsi, vf, false, &n_perms2,
nullptr, true);
- gcc_assert (ok);
+ gcc_assert (ok && n_perms == n_perms2);
}
dr_chain.release ();
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 6872f8a..34a3a57 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -288,6 +288,7 @@ struct vect_load_store_data : vect_data {
} gs;
tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
auto_vec<int> elsvals;
+ unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION
};
/* A computation tree of an SLP instance. Each node corresponds to a group of