aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-slp.cc
diff options
context:
space:
mode:
authorKewen Lin <linkw@linux.ibm.com>2023-05-24 00:05:01 -0500
committerKewen Lin <linkw@linux.ibm.com>2023-05-24 00:05:01 -0500
commite55c134ebeef2fa23ad5f4d8afa36b5949b2852c (patch)
treed76abadd94081f90e744dfb55a4128c96ade009c /gcc/tree-vect-slp.cc
parente0600a02fc3eda109d12bdfccf1408c5bf2994db (diff)
downloadgcc-e55c134ebeef2fa23ad5f4d8afa36b5949b2852c.zip
gcc-e55c134ebeef2fa23ad5f4d8afa36b5949b2852c.tar.gz
gcc-e55c134ebeef2fa23ad5f4d8afa36b5949b2852c.tar.bz2
vect: Enhance cost evaluation in vect_transform_slp_perm_load_1
Following Richi's suggestion in [1], I'm working on deferring cost evaluation next to the transformation, this patch is to enhance function vect_transform_slp_perm_load_1 which could under-cost for vector permutation, since the costing doesn't try to consider nvectors_per_build, it's inconsistent with the transformation part. Basically it changes the below if (index == count) { if (!noop_p) { // A ... // ++*n_perms; if (!analyze_only) { // B1 ... // B2 ... for ... // B3 building VEC_PERM_EXPR } } else if (!analyze_only) { // no B2 since no any further uses here. for ... // B4 building nothing } // B5 ... } to: if (index == count) { if (!noop_p) { // A ... if (!analyze_only) // B1 ... // B2 ... (trivial computations during analyze_only or not) for ... { // now n_perms is consistent with building VEC_PERM_EXPR // ++*n_perms; if (analyze_only) continue; // B3 building VEC_PERM_EXPR } } else if (!analyze_only) { // no B2 since no any further uses here. for ... // B4 building nothing } // B5 ... } [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563624.html gcc/ChangeLog: * tree-vect-slp.cc (vect_transform_slp_perm_load_1): Adjust the calculation on n_perms by considering nvectors_per_build. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c: New test.
Diffstat (limited to 'gcc/tree-vect-slp.cc')
-rw-r--r--gcc/tree-vect-slp.cc66
1 files changed, 34 insertions, 32 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index a6f277c..ab89a82 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8124,12 +8124,12 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
mode = TYPE_MODE (vectype);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
/* Initialize the vect stmts of NODE to properly insert the generated
stmts later. */
if (! analyze_only)
- for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
- i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
+ for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); i < nstmts; i++)
SLP_TREE_VEC_STMTS (node).quick_push (NULL);
/* Generate permutation masks for every NODE. Number of masks for each NODE
@@ -8170,7 +8170,10 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
(b) the permutes only need a single vector input. */
mask.new_vector (nunits, group_size, 3);
nelts_to_build = mask.encoded_nelts ();
- nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
+ /* It's possible to obtain zero nstmts during analyze_only, so make
+ it at least one to ensure the later computation for n_perms
+ proceed. */
+ nvectors_per_build = nstmts > 0 ? nstmts : 1;
in_nlanes = DR_GROUP_SIZE (stmt_info) * 3;
}
else
@@ -8261,40 +8264,39 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
return false;
}
- ++*n_perms;
-
+ tree mask_vec = NULL_TREE;
if (!analyze_only)
- {
- tree mask_vec = vect_gen_perm_mask_checked (vectype, indices);
+ mask_vec = vect_gen_perm_mask_checked (vectype, indices);
- if (second_vec_index == -1)
- second_vec_index = first_vec_index;
+ if (second_vec_index == -1)
+ second_vec_index = first_vec_index;
- for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
+ for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
+ {
+ ++*n_perms;
+ if (analyze_only)
+ continue;
+ /* Generate the permute statement if necessary. */
+ tree first_vec = dr_chain[first_vec_index + ri];
+ tree second_vec = dr_chain[second_vec_index + ri];
+ gassign *stmt = as_a<gassign *> (stmt_info->stmt);
+ tree perm_dest
+ = vect_create_destination_var (gimple_assign_lhs (stmt),
+ vectype);
+ perm_dest = make_ssa_name (perm_dest);
+ gimple *perm_stmt
+ = gimple_build_assign (perm_dest, VEC_PERM_EXPR, first_vec,
+ second_vec, mask_vec);
+ vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
+ gsi);
+ if (dce_chain)
{
- /* Generate the permute statement if necessary. */
- tree first_vec = dr_chain[first_vec_index + ri];
- tree second_vec = dr_chain[second_vec_index + ri];
- gassign *stmt = as_a<gassign *> (stmt_info->stmt);
- tree perm_dest
- = vect_create_destination_var (gimple_assign_lhs (stmt),
- vectype);
- perm_dest = make_ssa_name (perm_dest);
- gimple *perm_stmt
- = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
- first_vec, second_vec, mask_vec);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
- gsi);
- if (dce_chain)
- {
- bitmap_set_bit (used_defs, first_vec_index + ri);
- bitmap_set_bit (used_defs, second_vec_index + ri);
- }
-
- /* Store the vector statement in NODE. */
- SLP_TREE_VEC_STMTS (node) [vect_stmts_counter++]
- = perm_stmt;
+ bitmap_set_bit (used_defs, first_vec_index + ri);
+ bitmap_set_bit (used_defs, second_vec_index + ri);
}
+
+ /* Store the vector statement in NODE. */
+ SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
}
}
else if (!analyze_only)