aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2015-04-28 08:30:44 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2015-04-28 08:30:44 +0000
commit90dd6e3df86c81bdc1380513c57cce64caf32f72 (patch)
treebdcf186857724357c1663bf3fe7816d98815ea78
parentfde9b31b61aa281b502e4f49200e3090aa8a4dd1 (diff)
downloadgcc-90dd6e3df86c81bdc1380513c57cce64caf32f72.zip
gcc-90dd6e3df86c81bdc1380513c57cce64caf32f72.tar.gz
gcc-90dd6e3df86c81bdc1380513c57cce64caf32f72.tar.bz2
re PR fortran/62283 (basic-block vectorization fails)
2015-04-28 Richard Biener <rguenther@suse.de> PR tree-optimization/62283 * tree-vect-slp.c (vect_build_slp_tree): When the SLP build fails fatally and we are vectorizing a basic-block simply cause the child to be constructed piecewise. (vect_analyze_slp_cost_1): Adjust. (vect_detect_hybrid_slp_stmts): Likewise. (vect_bb_slp_scalar_cost): Likewise. (vect_get_constant_vectors): For piecewise constructed constants place them after the last def. (vect_get_slp_defs): Adjust. * tree-vect-stmts.c (vect_is_simple_use): Detect in-BB externals for basic-block vectorization. * gfortran.dg/vect/pr62283-2.f: New testcase. * gcc.dg/vect/bb-slp-14.c: Adjust. From-SVN: r222514
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-14.c5
-rw-r--r--gcc/testsuite/gfortran.dg/vect/pr62283-2.f13
-rw-r--r--gcc/tree-vect-slp.c91
-rw-r--r--gcc/tree-vect-stmts.c5
6 files changed, 111 insertions, 24 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 070a150..8efe3d8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2015-04-28 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/62283
+ * tree-vect-slp.c (vect_build_slp_tree): When the SLP build
+ fails fatally and we are vectorizing a basic-block simply
+ cause the child to be constructed piecewise.
+ (vect_analyze_slp_cost_1): Adjust.
+ (vect_detect_hybrid_slp_stmts): Likewise.
+ (vect_bb_slp_scalar_cost): Likewise.
+ (vect_get_constant_vectors): For piecewise constructed
+ constants place them after the last def.
+ (vect_get_slp_defs): Adjust.
+ * tree-vect-stmts.c (vect_is_simple_use): Detect in-BB
+ externals for basic-block vectorization.
+
2015-04-28 Thomas Preud'homme <thomas.preudhomme@arm.com>
PR target/63503
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f9cce49..703bc3a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
2015-04-28 Richard Biener <rguenther@suse.de>
+ PR tree-optimization/62283
+ * gfortran.dg/vect/pr62283-2.f: New testcase.
+ * gcc.dg/vect/bb-slp-14.c: Adjust.
+
+2015-04-28 Richard Biener <rguenther@suse.de>
+
PR tree-optimization/65851
* g++.dg/torture/pr65851.C: New testcase.
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-14.c b/gcc/testsuite/gcc.dg/vect/bb-slp-14.c
index a55c48e..d8ba10b 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-14.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-14.c
@@ -14,7 +14,8 @@ main1 (unsigned int x, unsigned int y)
int i;
unsigned int a0, a1, a2, a3;
- /* Not consecutive load with permutation - not supported. */
+ /* Not consecutive load with permutation - supported with building up
+ the vector from scalars. */
a0 = in[0] + 23;
a1 = in[1] + 142;
a2 = in[1] + 2;
@@ -47,6 +48,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
/* { dg-final { cleanup-tree-dump "slp2" } } */
diff --git a/gcc/testsuite/gfortran.dg/vect/pr62283-2.f b/gcc/testsuite/gfortran.dg/vect/pr62283-2.f
new file mode 100644
index 0000000..b71ac3e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr62283-2.f
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_float }
+! { dg-additional-options "-fdump-tree-slp2-details" }
+ subroutine saxpy(alpha,x,y)
+ real x(4),y(4),alpha
+ y(1)=y(1)+alpha*x(1)
+ y(2)=y(2)+alpha*x(2)
+ y(3)=y(3)+alpha*x(3)
+ y(4)=y(4)+alpha*x(4)
+ end
+! { dg-final { scan-tree-dump "basic block vectorized" "slp2" } }
+! { dg-final { cleanup-tree-dump "slp2" } }
+! { dg-final { cleanup-tree-dump "vect" } }
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index d82df3e..b066763 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1017,6 +1017,29 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
continue;
}
+ /* If the SLP build failed fatally and we analyze a basic-block
+ simply treat nodes we fail to build as externally defined
+ (and thus build vectors from the scalar defs).
+ The cost model will reject outright expensive cases.
+ ??? This doesn't treat cases where permutation ultimatively
+ fails (or we don't try permutation below). Ideally we'd
+ even compute a permutation that will end up with the maximum
+ SLP tree size... */
+ if (bb_vinfo
+ && !matches[0]
+ /* ??? Rejecting patterns this way doesn't work. We'd have to
+ do extra work to cancel the pattern so the uses see the
+ scalar version. */
+ && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Building vector operands from scalars\n");
+ oprnd_info->def_stmts = vNULL;
+ vect_free_slp_tree (child);
+ SLP_TREE_CHILDREN (*node).quick_push (NULL);
+ continue;
+ }
+
/* If the SLP build for operand zero failed and operand zero
and one can be commutated try that for the scalar stmts
that failed the match. */
@@ -1417,9 +1440,10 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
- instance, child, prologue_cost_vec,
- ncopies_for_cost);
+ if (child)
+ vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
+ instance, child, prologue_cost_vec,
+ ncopies_for_cost);
/* Look at the first scalar stmt to determine the cost. */
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1885,7 +1909,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
STMT_SLP_TYPE (stmt_vinfo) = hybrid;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
- vect_detect_hybrid_slp_stmts (child, i, stype);
+ if (child)
+ vect_detect_hybrid_slp_stmts (child, i, stype);
}
/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
@@ -2162,7 +2187,8 @@ vect_bb_slp_scalar_cost (basic_block bb,
}
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
- scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
+ if (child)
+ scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
return scalar_cost;
}
@@ -2612,6 +2638,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
number_of_places_left_in_vector = nunits;
elts = XALLOCAVEC (tree, nunits);
+ bool place_after_defs = false;
for (j = 0; j < number_of_copies; j++)
{
for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
@@ -2682,6 +2709,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
/* Create 'vect_ = {op0,op1,...,opn}'. */
number_of_places_left_in_vector--;
+ tree orig_op = op;
if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
{
if (CONSTANT_CLASS_P (op))
@@ -2704,6 +2732,12 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
elts[number_of_places_left_in_vector] = op;
if (!CONSTANT_CLASS_P (op))
constant_p = false;
+ if (TREE_CODE (orig_op) == SSA_NAME
+ && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
+ && STMT_VINFO_BB_VINFO (stmt_vinfo)
+ && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
+ == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
+ place_after_defs = true;
if (number_of_places_left_in_vector == 0)
{
@@ -2720,16 +2754,25 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
vec_cst = build_constructor (vector_type, v);
}
- voprnds.quick_push (vect_init_vector (stmt, vec_cst,
- vector_type, NULL));
+ tree init;
+ gimple_stmt_iterator gsi;
+ if (place_after_defs)
+ {
+ gsi = gsi_for_stmt
+ (vect_find_last_scalar_stmt_in_slp (slp_node));
+ init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
+ }
+ else
+ init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
if (ctor_seq != NULL)
{
- gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
- gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
+ gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
gsi_insert_seq_before_without_update (&gsi, ctor_seq,
GSI_SAME_STMT);
ctor_seq = NULL;
}
+ voprnds.quick_push (init);
+ place_after_defs = false;
}
}
}
@@ -2825,20 +2868,26 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
child = SLP_TREE_CHILDREN (slp_node)[child_index];
/* We have to check both pattern and original def, if available. */
- gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
- gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
-
- if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
- || (related
- && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
+ if (child)
{
- /* The number of vector defs is determined by the number of
- vector statements in the node from which we get those
- statements. */
- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
- vectorized_defs = true;
- child_index++;
+ gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
+ gimple related
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
+
+ if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
+ || (related
+ && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
+ {
+ /* The number of vector defs is determined by the number of
+ vector statements in the node from which we get those
+ statements. */
+ number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
+ vectorized_defs = true;
+ child_index++;
+ }
}
+ else
+ child_index++;
}
if (!vectorized_defs)
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 4496293..2ce6d4d 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -7752,7 +7752,10 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
else
{
stmt_vinfo = vinfo_for_stmt (*def_stmt);
- *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
+ if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
+ *dt = vect_external_def;
+ else
+ *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
}
if (dump_enabled_p ())