aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-layout-20.c33
-rw-r--r--gcc/tree-vect-slp.cc40
2 files changed, 63 insertions, 10 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-layout-20.c b/gcc/testsuite/gcc.dg/vect/bb-slp-layout-20.c
new file mode 100644
index 0000000..ed7816b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-layout-20.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+
+extern int a[][4], b[][4], c[][4], d[4], e[4];
+void f()
+{
+ int t0 = a[0][3];
+ int t1 = a[1][3];
+ int t2 = a[2][3];
+ int t3 = a[3][3];
+ int a0 = 0, a1 = 0, a2 = 0, a3 = 0, b0 = 0, b1 = 0, b2 = 0, b3 = 0;
+ for (int i = 0; i < 400; i += 4)
+ {
+ a0 += b[i][3] * t0;
+ a1 += b[i][2] * t1;
+ a2 += b[i][1] * t2;
+ a3 += b[i][0] * t3;
+ b0 += c[i][3] * t0;
+ b1 += c[i][2] * t1;
+ b2 += c[i][1] * t2;
+ b3 += c[i][0] * t3;
+ }
+ d[0] = a0;
+ d[1] = a1;
+ d[2] = a2;
+ d[3] = a3;
+ e[0] = b0;
+ e[1] = b1;
+ e[2] = b2;
+ e[3] = b3;
+}
+
+/* { dg-final { scan-tree-dump-times "add new stmt: \[^\\n\\r\]* = VEC_PERM_EXPR" 3 "slp1" { target { vect_int_mult && vect_perm } } } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 59ec66a..b10f69d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4436,18 +4436,19 @@ change_vec_perm_layout (slp_tree node, lane_permutation_t &perm,
IN_LAYOUT_I has no meaning for other types of node.
- Keeping the node as-is is always valid. If the target doesn't appear to
- support the node as-is then layout 0 has a high and arbitrary cost instead
- of being invalid. On the one hand, this ensures that every node has at
- least one valid layout, avoiding what would otherwise be an awkward
- special case. On the other, it still encourages the pass to change
- an invalid pre-existing layout choice into a valid one. */
+ Keeping the node as-is is always valid. If the target doesn't appear
+ to support the node as-is, but might realistically support other layouts,
+ then layout 0 instead has the cost of a worst-case permutation. On the
+ one hand, this ensures that every node has at least one valid layout,
+ avoiding what would otherwise be an awkward special case. On the other,
+ it still encourages the pass to change an invalid pre-existing layout
+ choice into a valid one. */
int
vect_optimize_slp_pass::internal_node_cost (slp_tree node, int in_layout_i,
unsigned int out_layout_i)
{
- const int fallback_cost = 100;
+ const int fallback_cost = 1;
if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
{
@@ -4457,8 +4458,9 @@ vect_optimize_slp_pass::internal_node_cost (slp_tree node, int in_layout_i,
/* Check that the child nodes support the chosen layout. Checking
the first child is enough, since any second child would have the
same shape. */
+ auto first_child = SLP_TREE_CHILDREN (node)[0];
if (in_layout_i > 0
- && !is_compatible_layout (SLP_TREE_CHILDREN (node)[0], in_layout_i))
+ && !is_compatible_layout (first_child, in_layout_i))
return -1;
change_vec_perm_layout (node, tmp_perm, in_layout_i, out_layout_i);
@@ -4469,7 +4471,15 @@ vect_optimize_slp_pass::internal_node_cost (slp_tree node, int in_layout_i,
if (count < 0)
{
if (in_layout_i == 0 && out_layout_i == 0)
- return fallback_cost;
+ {
+ /* Use the fallback cost if the node could in principle support
+ some nonzero layout for both the inputs and the outputs.
+ Otherwise assume that the node will be rejected later
+ and rebuilt from scalars. */
+ if (SLP_TREE_LANES (node) == SLP_TREE_LANES (first_child))
+ return fallback_cost;
+ return 0;
+ }
return -1;
}
@@ -4498,8 +4508,18 @@ vect_optimize_slp_pass::internal_node_cost (slp_tree node, int in_layout_i,
if (!vect_transform_slp_perm_load_1 (m_vinfo, node, tmp_perm, vNULL,
nullptr, vf, true, false, &n_perms))
{
+ auto rep = SLP_TREE_REPRESENTATIVE (node);
if (out_layout_i == 0)
- return fallback_cost;
+ {
+ /* Use the fallback cost if the load is an N-to-N permutation.
+ Otherwise assume that the node will be rejected later
+ and rebuilt from scalars. */
+ if (STMT_VINFO_GROUPED_ACCESS (rep)
+ && (DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (rep))
+ == SLP_TREE_LANES (node)))
+ return fallback_cost;
+ return 0;
+ }
return -1;
}