diff options
author | Kewen Lin <linkw@linux.ibm.com> | 2023-05-24 00:05:01 -0500 |
---|---|---|
committer | Kewen Lin <linkw@linux.ibm.com> | 2023-05-24 00:05:01 -0500 |
commit | e55c134ebeef2fa23ad5f4d8afa36b5949b2852c (patch) | |
tree | d76abadd94081f90e744dfb55a4128c96ade009c /gcc/tree-vect-slp.cc | |
parent | e0600a02fc3eda109d12bdfccf1408c5bf2994db (diff) | |
download | gcc-e55c134ebeef2fa23ad5f4d8afa36b5949b2852c.zip gcc-e55c134ebeef2fa23ad5f4d8afa36b5949b2852c.tar.gz gcc-e55c134ebeef2fa23ad5f4d8afa36b5949b2852c.tar.bz2 |
vect: Enhance cost evaluation in vect_transform_slp_perm_load_1
Following Richi's suggestion in [1], I'm working on deferring
cost evaluation next to the transformation, this patch is
to enhance function vect_transform_slp_perm_load_1 which
could under-cost for vector permutation, since the costing
doesn't try to consider nvectors_per_build, it's inconsistent
with the transformation part.
Basically it changes the below
if (index == count)
{
if (!noop_p)
{
// A ...
// ++*n_perms;
if (!analyze_only)
{
// B1 ...
// B2 ...
for ...
// B3 building VEC_PERM_EXPR
}
}
else if (!analyze_only)
{
// no B2 since no any further uses here.
for ...
// B4 building nothing
}
// B5 ...
}
to:
if (index == count)
{
if (!noop_p)
{
// A ...
if (!analyze_only)
// B1 ...
// B2 ... (trivial computations during analyze_only or not)
for ...
{
// now n_perms is consistent with building VEC_PERM_EXPR
// ++*n_perms;
if (analyze_only)
continue;
// B3 building VEC_PERM_EXPR
}
}
else if (!analyze_only)
{
// no B2 since no any further uses here.
for ...
// B4 building nothing
}
// B5 ...
}
[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563624.html
gcc/ChangeLog:
* tree-vect-slp.cc (vect_transform_slp_perm_load_1): Adjust the
calculation on n_perms by considering nvectors_per_build.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c: New test.
Diffstat (limited to 'gcc/tree-vect-slp.cc')
-rw-r--r-- | gcc/tree-vect-slp.cc | 66 |
1 files changed, 34 insertions, 32 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index a6f277c..ab89a82 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -8124,12 +8124,12 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node, mode = TYPE_MODE (vectype); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node); /* Initialize the vect stmts of NODE to properly insert the generated stmts later. */ if (! analyze_only) - for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); - i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++) + for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); i < nstmts; i++) SLP_TREE_VEC_STMTS (node).quick_push (NULL); /* Generate permutation masks for every NODE. Number of masks for each NODE @@ -8170,7 +8170,10 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node, (b) the permutes only need a single vector input. */ mask.new_vector (nunits, group_size, 3); nelts_to_build = mask.encoded_nelts (); - nvectors_per_build = SLP_TREE_VEC_STMTS (node).length (); + /* It's possible to obtain zero nstmts during analyze_only, so make + it at least one to ensure the later computation for n_perms + proceed. */ + nvectors_per_build = nstmts > 0 ? nstmts : 1; in_nlanes = DR_GROUP_SIZE (stmt_info) * 3; } else @@ -8261,40 +8264,39 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node, return false; } - ++*n_perms; - + tree mask_vec = NULL_TREE; if (!analyze_only) - { - tree mask_vec = vect_gen_perm_mask_checked (vectype, indices); + mask_vec = vect_gen_perm_mask_checked (vectype, indices); - if (second_vec_index == -1) - second_vec_index = first_vec_index; + if (second_vec_index == -1) + second_vec_index = first_vec_index; - for (unsigned int ri = 0; ri < nvectors_per_build; ++ri) + for (unsigned int ri = 0; ri < nvectors_per_build; ++ri) + { + ++*n_perms; + if (analyze_only) + continue; + /* Generate the permute statement if necessary. */ + tree first_vec = dr_chain[first_vec_index + ri]; + tree second_vec = dr_chain[second_vec_index + ri]; + gassign *stmt = as_a<gassign *> (stmt_info->stmt); + tree perm_dest + = vect_create_destination_var (gimple_assign_lhs (stmt), + vectype); + perm_dest = make_ssa_name (perm_dest); + gimple *perm_stmt + = gimple_build_assign (perm_dest, VEC_PERM_EXPR, first_vec, + second_vec, mask_vec); + vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, + gsi); + if (dce_chain) { - /* Generate the permute statement if necessary. */ - tree first_vec = dr_chain[first_vec_index + ri]; - tree second_vec = dr_chain[second_vec_index + ri]; - gassign *stmt = as_a<gassign *> (stmt_info->stmt); - tree perm_dest - = vect_create_destination_var (gimple_assign_lhs (stmt), - vectype); - perm_dest = make_ssa_name (perm_dest); - gimple *perm_stmt - = gimple_build_assign (perm_dest, VEC_PERM_EXPR, - first_vec, second_vec, mask_vec); - vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, - gsi); - if (dce_chain) - { - bitmap_set_bit (used_defs, first_vec_index + ri); - bitmap_set_bit (used_defs, second_vec_index + ri); - } - - /* Store the vector statement in NODE. */ - SLP_TREE_VEC_STMTS (node) [vect_stmts_counter++] - = perm_stmt; + bitmap_set_bit (used_defs, first_vec_index + ri); + bitmap_set_bit (used_defs, second_vec_index + ri); } + + /* Store the vector statement in NODE. */ + SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt; } } else if (!analyze_only) |