aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2017-11-29 14:38:06 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2017-11-29 14:38:06 +0000
commitf7300fff74becf365cdadd23c9447521da852e84 (patch)
treedb9b2a10fc6481be1fda9bfc85276cb2c9e9f376 /gcc
parentd5ed6a87ed359a8706bdfcc6853dacc881249fec (diff)
downloadgcc-f7300fff74becf365cdadd23c9447521da852e84.zip
gcc-f7300fff74becf365cdadd23c9447521da852e84.tar.gz
gcc-f7300fff74becf365cdadd23c9447521da852e84.tar.bz2
re PR tree-optimization/83202 (Try joining operations on consecutive array elements during tree vectorization)
2017-11-29 Richard Biener <rguenther@suse.de> PR tree-optimization/83202 * tree-vect-slp.c (scalar_stmts_set_t): New typedef. (bst_fail): Use it. (vect_analyze_slp_cost_1): Add visited set, do not account SLP nodes vectorized to the same stmts multiple times. (vect_analyze_slp_cost): Allocate a visited set and pass it down. (vect_analyze_slp_instance): Adjust. (scalar_stmts_to_slp_tree_map_t): New typedef. (vect_schedule_slp_instance): Add a map recording the SLP node representing the vectorized stmts for a set of scalar stmts. Avoid code-generating redundancies. (vect_schedule_slp): Allocate map and pass it down. * gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase. From-SVN: r255233
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c15
-rw-r--r--gcc/tree-vect-slp.c46
4 files changed, 72 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2b93243..003b6b1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2017-11-29 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/83202
+ * tree-vect-slp.c (scalar_stmts_set_t): New typedef.
+ (bst_fail): Use it.
+ (vect_analyze_slp_cost_1): Add visited set, do not account SLP
+ nodes vectorized to the same stmts multiple times.
+ (vect_analyze_slp_cost): Allocate a visited set and pass it down.
+ (vect_analyze_slp_instance): Adjust.
+ (scalar_stmts_to_slp_tree_map_t): New typedef.
+ (vect_schedule_slp_instance): Add a map recording the SLP node
+ representing the vectorized stmts for a set of scalar stmts.
+ Avoid code-generating redundancies.
+ (vect_schedule_slp): Allocate map and pass it down.
+
2017-11-29 Nathan Sidwell <nathan@acm.org>
PR c++/83187
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3b90be1..bf1e374 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-11-29 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/83202
+ * gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase.
+
2017-11-29 Nathan Sidwell <nathan@acm.org>
PR c++/83187
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c
new file mode 100644
index 0000000..bdec20f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+void test(double data[4][2])
+{
+ for (int i = 0; i < 4; i++)
+ {
+ data[i][0] = data[i][0] * data[i][0];
+ data[i][1] = data[i][1] * data[i][1];
+ }
+}
+
+/* We should vectorize this with SLP and V2DF vectors. */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index bc81b3d..19f2ac4 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -961,7 +961,8 @@ bst_traits::equal (value_type existing, value_type candidate)
return true;
}
-static hash_set <vec <gimple *>, bst_traits> *bst_fail;
+typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
+static scalar_stmts_set_t *bst_fail;
static slp_tree
vect_build_slp_tree_2 (vec_info *vinfo,
@@ -1674,7 +1675,8 @@ static void
vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
stmt_vector_for_cost *prologue_cost_vec,
stmt_vector_for_cost *body_cost_vec,
- unsigned ncopies_for_cost)
+ unsigned ncopies_for_cost,
+ scalar_stmts_set_t* visited)
{
unsigned i, j;
slp_tree child;
@@ -1682,11 +1684,18 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
stmt_vec_info stmt_info;
tree lhs;
+ /* If we already costed the exact same set of scalar stmts we're done.
+ We share the generated vector stmts for those. */
+ if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
+ return;
+
+ visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
+
/* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
- body_cost_vec, ncopies_for_cost);
+ body_cost_vec, ncopies_for_cost, visited);
/* Look at the first scalar stmt to determine the cost. */
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1871,9 +1880,11 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
prologue_cost_vec.create (10);
body_cost_vec.create (10);
+ scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
&prologue_cost_vec, &body_cost_vec,
- ncopies_for_cost);
+ ncopies_for_cost, visited);
+ delete visited;
/* Record the prologue costs, which were delayed until we were
sure that SLP was successful. */
@@ -2037,7 +2048,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
/* Build the tree for the SLP instance. */
bool *matches = XALLOCAVEC (bool, group_size);
unsigned npermutes = 0;
- bst_fail = new hash_set <vec <gimple *>, bst_traits> ();
+ bst_fail = new scalar_stmts_set_t ();
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
&max_nunits, &loads, matches, &npermutes,
NULL, max_tree_size);
@@ -3702,12 +3713,15 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
return true;
}
-
+typedef hash_map <vec <gimple *>, slp_tree,
+ simple_hashmap_traits <bst_traits, slp_tree> >
+ scalar_stmts_to_slp_tree_map_t;
/* Vectorize SLP instance tree in postorder. */
static bool
-vect_schedule_slp_instance (slp_tree node, slp_instance instance)
+vect_schedule_slp_instance (slp_tree node, slp_instance instance,
+ scalar_stmts_to_slp_tree_map_t *bst_map)
{
gimple *stmt;
bool grouped_store, is_store;
@@ -3721,8 +3735,19 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance)
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return false;
+ /* See if we have already vectorized the same set of stmts and reuse their
+ vectorized stmts. */
+ slp_tree &leader
+ = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
+ if (leader)
+ {
+ SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
+ return false;
+ }
+
+ leader = node;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_schedule_slp_instance (child, instance);
+ vect_schedule_slp_instance (child, instance, bst_map);
/* Push SLP node def-type to stmts. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
@@ -3894,8 +3919,11 @@ vect_schedule_slp (vec_info *vinfo)
FOR_EACH_VEC_ELT (slp_instances, i, instance)
{
/* Schedule the tree of INSTANCE. */
+ scalar_stmts_to_slp_tree_map_t *bst_map
+ = new scalar_stmts_to_slp_tree_map_t ();
is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
- instance);
+ instance, bst_map);
+ delete bst_map;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vectorizing stmts using SLP.\n");