aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-11-25 13:32:15 +0100
committerRichard Biener <rguenth@gcc.gnu.org>2024-11-25 14:24:30 +0100
commitcd8db107b9bef73fd822ffb420f96ed2bc622a19 (patch)
treea951d973e88f067189e6c460b419d58ed15baf2f
parentd9c908b75039653f2b7717b4b7cdffdc4f0fcc7d (diff)
downloadgcc-cd8db107b9bef73fd822ffb420f96ed2bc622a19.zip
gcc-cd8db107b9bef73fd822ffb420f96ed2bc622a19.tar.gz
gcc-cd8db107b9bef73fd822ffb420f96ed2bc622a19.tar.bz2
target/116760 - 416.gamess slowdown with SLP
For the TWOTFF loop vectorization the backend scales constructor and vector extract cost to make higher VFs less profitable. This heuristic currently fails to consider VMAT_STRIDED_SLP which we now get with single-lane SLP, causing a huge regression in SPEC 2k6 416.gamess for the respective loop nest. The following fixes this, matching behavior to that of GCC 14 by treating single-lane VMAT_STRIDED_SLP the same as VMAT_ELEMENTWISE. PR target/116760 * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Scale vec_construct for single-lane VMAT_STRIDED_SLP the same as VMAT_ELEMENTWISE. * tree-vect-stmts.cc (vectorizable_store): Pass SLP node down to costing for vec_to_scalar for VMAT_STRIDED_SLP.
-rw-r--r--gcc/config/i386/i386.cc8
-rw-r--r--gcc/tree-vect-stmts.cc3
2 files changed, 7 insertions, 4 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 8ab9120..fda2112 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25340,9 +25340,11 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
|| (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
== VMAT_GATHER_SCATTER)))
|| (node
- && ((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
- && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
- (SLP_TREE_REPRESENTATIVE (node))))
+ && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
+ || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
+ && SLP_TREE_LANES (node) == 1))
+ && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
+ (SLP_TREE_REPRESENTATIVE (node))))
!= INTEGER_CST))
|| (SLP_TREE_MEMORY_ACCESS_TYPE (node)
== VMAT_GATHER_SCATTER)))))
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 82cd389..c2d5818 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8832,7 +8832,8 @@ vectorizable_store (vec_info *vinfo,
if (nstores > 1)
inside_cost
+= record_stmt_cost (cost_vec, 1, vec_to_scalar,
- stmt_info, 0, vect_body);
+ stmt_info, slp_node,
+ 0, vect_body);
/* Take a single lane vector type store as scalar
store to avoid ICE like 110776. */
if (VECTOR_TYPE_P (ltype)