diff options
author | Richard Biener <rguenther@suse.de> | 2025-09-12 13:20:46 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2025-09-14 11:05:42 +0200 |
commit | 80f9440b5ecb1c3ab943b5c862f84e06fbeabd89 (patch) | |
tree | 404f6480b0062e219276e38d129d0e5704a80e91 | |
parent | 1744dc23b311907d04edf312a3dbafa2e4f6e6a2 (diff) | |
download | gcc-80f9440b5ecb1c3ab943b5c862f84e06fbeabd89.zip gcc-80f9440b5ecb1c3ab943b5c862f84e06fbeabd89.tar.gz gcc-80f9440b5ecb1c3ab943b5c862f84e06fbeabd89.tar.bz2 |
Do less redundant vect_transform_slp_perm_load calls
The following tries to do vect_transform_slp_perm_load exactly
once during analysis and once during transform. There's a 2nd
case left during analysis in get_load_store_type. Temporarily
this records n_perms in the load-store info and verifies that
against the value computed at transform stage.
* tree-vectorizer.h (vect_load_store_data::n_perms): New.
* tree-vect-stmts.cc (vectorizable_load): Analyze
SLP_TREE_LOAD_PERMUTATION only once and remember n_perms.
Verify the transform-time n_perms against the value stored
during analysis.
-rw-r--r-- | gcc/tree-vect-stmts.cc | 47 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 1 |
2 files changed, 27 insertions, 21 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index ba78f60..f78acaf 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9478,6 +9478,7 @@ vectorizable_load (vec_info *vinfo, /* ??? The following checks should really be part of get_load_store_type. */ + unsigned n_perms = -1U; if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists () && !((memory_access_type == VMAT_ELEMENTWISE || mat_gather_scatter_p (memory_access_type)) @@ -9485,7 +9486,7 @@ vectorizable_load (vec_info *vinfo, { slp_perm = true; - if (!loop_vinfo) + if (!loop_vinfo && cost_vec) { /* In BB vectorization we may not actually use a loaded vector accessing elements in excess of DR_GROUP_SIZE. */ @@ -9508,17 +9509,21 @@ vectorizable_load (vec_info *vinfo, } } - auto_vec<tree> tem; - unsigned n_perms; - if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf, - true, &n_perms)) + if (cost_vec) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, - vect_location, - "unsupported load permutation\n"); - return false; + if (!vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf, + true, &n_perms)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "unsupported load permutation\n"); + return false; + } + ls.n_perms = n_perms; } + else + n_perms = ls.n_perms; } if (slp_node->ldst_lanes @@ -9989,18 +9994,19 @@ vectorizable_load (vec_info *vinfo, } if (slp_perm) { - unsigned n_perms; if (costing_p) { - unsigned n_loads; - vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf, - true, &n_perms, &n_loads); + gcc_assert (n_perms != -1U); inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm, slp_node, 0, vect_body); } else - vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf, - false, &n_perms); + { + unsigned n_perms2; + vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf, + false, &n_perms2); + gcc_assert (n_perms == n_perms2); + } } if (costing_p) @@ -11378,25 +11384,24 @@ vectorizable_load (vec_info *vinfo, if (slp_perm) { - unsigned n_perms; /* For SLP we know we've seen all possible uses of dr_chain so direct vect_transform_slp_perm_load to DCE the unused parts. ??? This is a hack to prevent compile-time issues as seen in PR101120 and friends. */ if (costing_p) { - vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf, - true, &n_perms, nullptr); + gcc_assert (n_perms != -1U); if (n_perms != 0) inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm, slp_node, 0, vect_body); } else { + unsigned n_perms2; bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, - gsi, vf, false, &n_perms, + gsi, vf, false, &n_perms2, nullptr, true); - gcc_assert (ok); + gcc_assert (ok && n_perms == n_perms2); } dr_chain.release (); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 6872f8a..34a3a57 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -288,6 +288,7 @@ struct vect_load_store_data : vect_data { } gs; tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided auto_vec<int> elsvals; + unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION }; /* A computation tree of an SLP instance. Each node corresponds to a group of |