diff options
author | Richard Biener <rguenther@suse.de> | 2017-11-29 14:38:06 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2017-11-29 14:38:06 +0000 |
commit | f7300fff74becf365cdadd23c9447521da852e84 (patch) | |
tree | db9b2a10fc6481be1fda9bfc85276cb2c9e9f376 /gcc | |
parent | d5ed6a87ed359a8706bdfcc6853dacc881249fec (diff) | |
download | gcc-f7300fff74becf365cdadd23c9447521da852e84.zip gcc-f7300fff74becf365cdadd23c9447521da852e84.tar.gz gcc-f7300fff74becf365cdadd23c9447521da852e84.tar.bz2 |
re PR tree-optimization/83202 (Try joining operations on consecutive array elements during tree vectorization)
2017-11-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/83202
* tree-vect-slp.c (scalar_stmts_set_t): New typedef.
(bst_fail): Use it.
(vect_analyze_slp_cost_1): Add visited set, do not account SLP
nodes vectorized to the same stmts multiple times.
(vect_analyze_slp_cost): Allocate a visited set and pass it down.
(vect_analyze_slp_instance): Adjust.
(scalar_stmts_to_slp_tree_map_t): New typedef.
(vect_schedule_slp_instance): Add a map recording the SLP node
representing the vectorized stmts for a set of scalar stmts.
Avoid code-generating redundancies.
(vect_schedule_slp): Allocate map and pass it down.
* gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase.
From-SVN: r255233
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c | 15 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 46 |
4 files changed, 72 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2b93243..003b6b1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2017-11-29 Richard Biener <rguenther@suse.de> + + PR tree-optimization/83202 + * tree-vect-slp.c (scalar_stmts_set_t): New typedef. + (bst_fail): Use it. + (vect_analyze_slp_cost_1): Add visited set, do not account SLP + nodes vectorized to the same stmts multiple times. + (vect_analyze_slp_cost): Allocate a visited set and pass it down. + (vect_analyze_slp_instance): Adjust. + (scalar_stmts_to_slp_tree_map_t): New typedef. + (vect_schedule_slp_instance): Add a map recording the SLP node + representing the vectorized stmts for a set of scalar stmts. + Avoid code-generating redundancies. + (vect_schedule_slp): Allocate map and pass it down. + 2017-11-29 Nathan Sidwell <nathan@acm.org> PR c++/83187 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3b90be1..bf1e374 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-11-29 Richard Biener <rguenther@suse.de> + + PR tree-optimization/83202 + * gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase. + 2017-11-29 Nathan Sidwell <nathan@acm.org> PR c++/83187 diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c new file mode 100644 index 0000000..bdec20f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ + +void test(double data[4][2]) +{ + for (int i = 0; i < 4; i++) + { + data[i][0] = data[i][0] * data[i][0]; + data[i][1] = data[i][1] * data[i][1]; + } +} + +/* We should vectorize this with SLP and V2DF vectors. */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index bc81b3d..19f2ac4 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -961,7 +961,8 @@ bst_traits::equal (value_type existing, value_type candidate) return true; } -static hash_set <vec <gimple *>, bst_traits> *bst_fail; +typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t; +static scalar_stmts_set_t *bst_fail; static slp_tree vect_build_slp_tree_2 (vec_info *vinfo, @@ -1674,7 +1675,8 @@ static void vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, stmt_vector_for_cost *prologue_cost_vec, stmt_vector_for_cost *body_cost_vec, - unsigned ncopies_for_cost) + unsigned ncopies_for_cost, + scalar_stmts_set_t* visited) { unsigned i, j; slp_tree child; @@ -1682,11 +1684,18 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, stmt_vec_info stmt_info; tree lhs; + /* If we already costed the exact same set of scalar stmts we're done. + We share the generated vector stmts for those. */ + if (visited->contains (SLP_TREE_SCALAR_STMTS (node))) + return; + + visited->add (SLP_TREE_SCALAR_STMTS (node).copy ()); + /* Recurse down the SLP tree. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec, - body_cost_vec, ncopies_for_cost); + body_cost_vec, ncopies_for_cost, visited); /* Look at the first scalar stmt to determine the cost. */ stmt = SLP_TREE_SCALAR_STMTS (node)[0]; @@ -1871,9 +1880,11 @@ vect_analyze_slp_cost (slp_instance instance, void *data) prologue_cost_vec.create (10); body_cost_vec.create (10); + scalar_stmts_set_t *visited = new scalar_stmts_set_t (); vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance), &prologue_cost_vec, &body_cost_vec, - ncopies_for_cost); + ncopies_for_cost, visited); + delete visited; /* Record the prologue costs, which were delayed until we were sure that SLP was successful. */ @@ -2037,7 +2048,7 @@ vect_analyze_slp_instance (vec_info *vinfo, /* Build the tree for the SLP instance. */ bool *matches = XALLOCAVEC (bool, group_size); unsigned npermutes = 0; - bst_fail = new hash_set <vec <gimple *>, bst_traits> (); + bst_fail = new scalar_stmts_set_t (); node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, &max_nunits, &loads, matches, &npermutes, NULL, max_tree_size); @@ -3702,12 +3713,15 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, return true; } - +typedef hash_map <vec <gimple *>, slp_tree, + simple_hashmap_traits <bst_traits, slp_tree> > + scalar_stmts_to_slp_tree_map_t; /* Vectorize SLP instance tree in postorder. */ static bool -vect_schedule_slp_instance (slp_tree node, slp_instance instance) +vect_schedule_slp_instance (slp_tree node, slp_instance instance, + scalar_stmts_to_slp_tree_map_t *bst_map) { gimple *stmt; bool grouped_store, is_store; @@ -3721,8 +3735,19 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance) if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) return false; + /* See if we have already vectorized the same set of stmts and reuse their + vectorized stmts. */ + slp_tree &leader + = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ()); + if (leader) + { + SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader)); + return false; + } + + leader = node; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - vect_schedule_slp_instance (child, instance); + vect_schedule_slp_instance (child, instance, bst_map); /* Push SLP node def-type to stmts. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) @@ -3894,8 +3919,11 @@ vect_schedule_slp (vec_info *vinfo) FOR_EACH_VEC_ELT (slp_instances, i, instance) { /* Schedule the tree of INSTANCE. */ + scalar_stmts_to_slp_tree_map_t *bst_map + = new scalar_stmts_to_slp_tree_map_t (); is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), - instance); + instance, bst_map); + delete bst_map; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectorizing stmts using SLP.\n"); |