aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2020-09-15 14:35:40 +0200
committerRichard Biener <rguenther@suse.de>2020-09-15 14:41:21 +0200
commitc9de716a59c873859df3b3e1fbb993200fce5a73 (patch)
treedcf2c6218ef64c1050e9df4358fc6603de07b9a4 /gcc
parent80297f897758f59071968ddff2a04a8d11481117 (diff)
downloadgcc-c9de716a59c873859df3b3e1fbb993200fce5a73.zip
gcc-c9de716a59c873859df3b3e1fbb993200fce5a73.tar.gz
gcc-c9de716a59c873859df3b3e1fbb993200fce5a73.tar.bz2
Allow more BB vectorization
The following allows more BB vectorization by generally building leafs from scalars rather than giving up. Note this is only a first step towards this and as can be seen with the exception for node splitting it is generally hard to get this heuristic sound. I've added variants of the bb-slp-48.c testcase to make sure we still try permuting for example. 2020-09-15 Richard Biener <rguenther@suse.de> * tree-vect-slp.c (vect_build_slp_tree_2): Also consider building an operand from scalars when building it did not fail fatally but avoid messing with the upcall splitting of groups. * gcc.dg/vect/bb-slp-48.c: New testcase. * gcc.dg/vect/bb-slp-7.c: Adjust.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-48.c55
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-7.c3
-rw-r--r--gcc/tree-vect-slp.c70
3 files changed, 98 insertions, 30 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-48.c b/gcc/testsuite/gcc.dg/vect/bb-slp-48.c
new file mode 100644
index 0000000..cd22932
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-48.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
+/* { dg-require-effective-target vect_double } */
+
+double a[2];
+
+void __GIMPLE (ssa,startwith ("fix_loops"))
+foo (double x)
+{
+ double tem2;
+ double tem1;
+ double _1;
+ double _2;
+ double _3;
+ double _4;
+
+ __BB(2):
+ _1 = a[0];
+ _2 = x_6(D) * 3.0e+0;
+ tem1_7 = _1 + _2;
+ _3 = x_6(D) + 1.0e+0;
+ _4 = a[1];
+ tem2_8 = _4 + _3;
+ a[0] = tem1_7;
+ a[1] = tem2_8;
+ return;
+}
+
+void __GIMPLE (ssa,startwith ("fix_loops"))
+bar (double x)
+{
+ double tem2;
+ double tem1;
+ double _1;
+ double _2;
+ double _3;
+ double _4;
+
+ __BB(2):
+ _1 = a[0];
+ _2 = x_6(D) * 3.0e+0;
+ tem1_7 = _1 + _2;
+ _3 = x_6(D) + 1.0e+0;
+ _4 = a[1];
+ /* Once with operands swapped. */
+ tem2_8 = _3 + _4;
+ a[0] = tem1_7;
+ a[1] = tem2_8;
+ return;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" } } */
+/* We want to vectorize as { a[0], a[1] } + { x*3, x+1 } and thus
+ elide one add in each function. */
+/* { dg-final { scan-tree-dump-times " \\+ " 4 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-7.c b/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
index b8bef8c..f12dc27 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-7.c
@@ -22,6 +22,7 @@ main1 (unsigned int x, unsigned int y)
a2 = *pin++ + 2;
a3 = *pin++ * 31;
+ /* But we can still vectorize the multiplication or the store. */
*pout++ = a0 * x;
*pout++ = a1 * y;
*pout++ = a2 * x;
@@ -46,5 +47,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 1591251..d844fe4 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1444,33 +1444,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
continue;
}
- /* If the SLP build failed fatally and we analyze a basic-block
- simply treat nodes we fail to build as externally defined
- (and thus build vectors from the scalar defs).
- The cost model will reject outright expensive cases.
- ??? This doesn't treat cases where permutation ultimatively
- fails (or we don't try permutation below). Ideally we'd
- even compute a permutation that will end up with the maximum
- SLP tree size... */
- if (is_a <bb_vec_info> (vinfo)
- && !matches[0]
- /* ??? Rejecting patterns this way doesn't work. We'd have to
- do extra work to cancel the pattern so the uses see the
- scalar version. */
- && !is_pattern_stmt_p (stmt_info)
- && !oprnd_info->any_pattern)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Building vector operands from scalars\n");
- this_tree_size++;
- child = vect_create_new_slp_node (oprnd_info->ops);
- children.safe_push (child);
- oprnd_info->ops = vNULL;
- oprnd_info->def_stmts = vNULL;
- continue;
- }
-
/* If the SLP build for operand zero failed and operand zero
and one can be commutated try that for the scalar stmts
that failed the match. */
@@ -1542,11 +1515,50 @@ vect_build_slp_tree_2 (vec_info *vinfo,
children.safe_push (child);
continue;
}
-
+ /* We do not undo the swapping here since it might still be
+ the better order for the second operand in case we build
+ the first one from scalars below. */
++*npermutes;
}
-
fail:
+
+ /* If the SLP build failed and we analyze a basic-block
+ simply treat nodes we fail to build as externally defined
+ (and thus build vectors from the scalar defs).
+ The cost model will reject outright expensive cases.
+ ??? This doesn't treat cases where permutation ultimatively
+ fails (or we don't try permutation below). Ideally we'd
+ even compute a permutation that will end up with the maximum
+ SLP tree size... */
+ if (is_a <bb_vec_info> (vinfo)
+ /* ??? Rejecting patterns this way doesn't work. We'd have to
+ do extra work to cancel the pattern so the uses see the
+ scalar version. */
+ && !is_pattern_stmt_p (stmt_info)
+ && !oprnd_info->any_pattern)
+ {
+ /* But if there's a leading vector sized set of matching stmts
+ fail here so we can split the group. This matches the condition
+ vect_analyze_slp_instance uses. */
+ /* ??? We might want to split here and combine the results to support
+ multiple vector sizes better. */
+ for (j = 0; j < group_size; ++j)
+ if (!matches[j])
+ break;
+ if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Building vector operands from scalars\n");
+ this_tree_size++;
+ child = vect_create_new_slp_node (oprnd_info->ops);
+ children.safe_push (child);
+ oprnd_info->ops = vNULL;
+ oprnd_info->def_stmts = vNULL;
+ continue;
+ }
+ }
+
gcc_assert (child == NULL);
FOR_EACH_VEC_ELT (children, j, child)
vect_free_slp_tree (child, false);