aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-slp.cc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-08-15 13:05:32 +0200
committerRichard Biener <rguenther@suse.de>2023-08-15 14:08:35 +0200
commitf049868d8db773da4be61d82025e97453d6e9f2b (patch)
tree184b74d4a8b187ad4211abcda12b63cdeb0bfbde /gcc/tree-vect-slp.cc
parent8a6097c7425c4b6294e11dea062df7a883343f47 (diff)
downloadgcc-f049868d8db773da4be61d82025e97453d6e9f2b.zip
gcc-f049868d8db773da4be61d82025e97453d6e9f2b.tar.gz
gcc-f049868d8db773da4be61d82025e97453d6e9f2b.tar.bz2
Support constants and externals in BB reduction vectorization
The following supports vectorizing BB reductions involving a constant or an invariant. * tree-vectorizer.h (_slp_instance::remain_stmts): Change to ... (_slp_instance::remain_defs): ... this. (SLP_INSTANCE_REMAIN_STMTS): Rename to ... (SLP_INSTANCE_REMAIN_DEFS): ... this. (slp_root::remain): New. (slp_root::slp_root): Adjust. * tree-vect-slp.cc (vect_free_slp_instance): Adjust. (vect_build_slp_instance): Get extra remain parameter, adjust former handling of a cut off stmt. (vect_analyze_slp_instance): Adjust. (vect_analyze_slp): Likewise. (_bb_vec_info::~_bb_vec_info): Likewise. (vectorizable_bb_reduc_epilogue): Dump something if we fail. (vect_slp_check_for_constructors): Handle non-internal defs as remain defs of a reduction. (vectorize_slp_instance_root_stmt): Adjust. * gcc.dg/vect/bb-slp-75.c: New testcase.
Diffstat (limited to 'gcc/tree-vect-slp.cc')
-rw-r--r--gcc/tree-vect-slp.cc60
1 files changed, 41 insertions, 19 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 41997d5..cf91b21 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -209,7 +209,7 @@ vect_free_slp_instance (slp_instance instance)
vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
SLP_INSTANCE_LOADS (instance).release ();
SLP_INSTANCE_ROOT_STMTS (instance).release ();
- SLP_INSTANCE_REMAIN_STMTS (instance).release ();
+ SLP_INSTANCE_REMAIN_DEFS (instance).release ();
instance->subgraph_entries.release ();
instance->cost_vec.release ();
free (instance);
@@ -3115,6 +3115,7 @@ vect_build_slp_instance (vec_info *vinfo,
slp_instance_kind kind,
vec<stmt_vec_info> &scalar_stmts,
vec<stmt_vec_info> &root_stmt_infos,
+ vec<tree> &remain,
unsigned max_tree_size, unsigned *limit,
scalar_stmts_to_slp_tree_map_t *bst_map,
/* ??? We need stmt_info for group splitting. */
@@ -3134,10 +3135,9 @@ vect_build_slp_instance (vec_info *vinfo,
??? Selecting the optimal set of lanes to vectorize would be nice
but SLP build for all lanes will fail quickly because we think
we're going to need unrolling. */
- auto_vec<stmt_vec_info> remain;
if (kind == slp_inst_kind_bb_reduc
&& (scalar_stmts.length () & 1))
- remain.safe_push (scalar_stmts.pop ());
+ remain.safe_insert (0, gimple_get_lhs (scalar_stmts.pop ()->stmt));
/* Build the tree for the SLP instance. */
unsigned int group_size = scalar_stmts.length ();
@@ -3186,10 +3186,7 @@ vect_build_slp_instance (vec_info *vinfo,
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_LOADS (new_instance) = vNULL;
SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
- if (!remain.is_empty ())
- SLP_INSTANCE_REMAIN_STMTS (new_instance) = remain.copy ();
- else
- SLP_INSTANCE_REMAIN_STMTS (new_instance) = vNULL;
+ SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
SLP_INSTANCE_KIND (new_instance) = kind;
new_instance->reduc_phis = NULL;
new_instance->cost_vec = vNULL;
@@ -3469,6 +3466,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
gcc_unreachable ();
vec<stmt_vec_info> roots = vNULL;
+ vec<tree> remain = vNULL;
if (kind == slp_inst_kind_ctor)
{
roots.create (1);
@@ -3476,7 +3474,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
}
/* Build the tree for the SLP instance. */
bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts,
- roots,
+ roots, remain,
max_tree_size, limit, bst_map,
kind == slp_inst_kind_store
? stmt_info : NULL);
@@ -3521,10 +3519,12 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
if (vect_build_slp_instance (bb_vinfo, bb_vinfo->roots[i].kind,
bb_vinfo->roots[i].stmts,
bb_vinfo->roots[i].roots,
+ bb_vinfo->roots[i].remain,
max_tree_size, &limit, bst_map, NULL))
{
bb_vinfo->roots[i].stmts = vNULL;
bb_vinfo->roots[i].roots = vNULL;
+ bb_vinfo->roots[i].remain = vNULL;
}
}
}
@@ -5955,6 +5955,7 @@ _bb_vec_info::~_bb_vec_info ()
{
roots[i].stmts.release ();
roots[i].roots.release ();
+ roots[i].remain.release ();
}
roots.release ();
}
@@ -6405,7 +6406,13 @@ vectorizable_bb_reduc_epilogue (slp_instance instance,
|| !direct_internal_fn_supported_p (reduc_fn, vectype, OPTIMIZE_FOR_BOTH)
|| !useless_type_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
TREE_TYPE (vectype)))
- return false;
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: basic block reduction epilogue "
+ "operation unsupported.\n");
+ return false;
+ }
/* There's no way to cost a horizontal vector reduction via REDUC_FN so
cost log2 vector operations plus shuffles and one extraction. */
@@ -7262,22 +7269,37 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
but record those to be handled in the epilogue. */
/* ??? For now do not allow mixing ops or externs/constants. */
bool invalid = false;
+ unsigned remain_cnt = 0;
for (unsigned i = 0; i < chain.length (); ++i)
- if (chain[i].dt != vect_internal_def
- || chain[i].code != code)
- invalid = true;
- if (!invalid)
+ {
+ if (chain[i].code != code)
+ {
+ invalid = true;
+ break;
+ }
+ if (chain[i].dt != vect_internal_def)
+ remain_cnt++;
+ }
+ if (!invalid && chain.length () - remain_cnt > 1)
{
vec<stmt_vec_info> stmts;
+ vec<tree> remain = vNULL;
stmts.create (chain.length ());
+ if (remain_cnt > 0)
+ remain.create (remain_cnt);
for (unsigned i = 0; i < chain.length (); ++i)
- stmts.quick_push (bb_vinfo->lookup_def (chain[i].op));
+ {
+ if (chain[i].dt == vect_internal_def)
+ stmts.quick_push (bb_vinfo->lookup_def (chain[i].op));
+ else
+ remain.quick_push (chain[i].op);
+ }
vec<stmt_vec_info> roots;
roots.create (chain_stmts.length ());
for (unsigned i = 0; i < chain_stmts.length (); ++i)
roots.quick_push (bb_vinfo->lookup_stmt (chain_stmts[i]));
bb_vinfo->roots.safe_push (slp_root (slp_inst_kind_bb_reduc,
- stmts, roots));
+ stmts, roots, remain));
}
}
}
@@ -9160,16 +9182,16 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
gcc_unreachable ();
tree scalar_def = gimple_build (&epilogue, as_combined_fn (reduc_fn),
TREE_TYPE (TREE_TYPE (vec_def)), vec_def);
- if (!SLP_INSTANCE_REMAIN_STMTS (instance).is_empty ())
+ if (!SLP_INSTANCE_REMAIN_DEFS (instance).is_empty ())
{
tree rem_def = NULL_TREE;
- for (auto rem : SLP_INSTANCE_REMAIN_STMTS (instance))
+ for (auto def : SLP_INSTANCE_REMAIN_DEFS (instance))
if (!rem_def)
- rem_def = gimple_get_lhs (rem->stmt);
+ rem_def = def;
else
rem_def = gimple_build (&epilogue, reduc_code,
TREE_TYPE (scalar_def),
- rem_def, gimple_get_lhs (rem->stmt));
+ rem_def, def);
scalar_def = gimple_build (&epilogue, reduc_code,
TREE_TYPE (scalar_def),
scalar_def, rem_def);