aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-recurr-1.c38
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-recurr-2.c39
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-recurr-3.c39
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-recurr-4.c42
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-recurr-5.c43
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-recurr-6.c39
-rw-r--r--gcc/tree-vect-loop.cc281
-rw-r--r--gcc/tree-vect-slp.cc38
-rw-r--r--gcc/tree-vect-stmts.cc17
-rw-r--r--gcc/tree-vectorizer.h4
13 files changed, 558 insertions, 28 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c
index f1302b6..83eaa7a 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c
@@ -40,4 +40,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c
index bdc8a01..06e9b0a 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c
@@ -39,4 +39,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c
index 0b474c2..91cdc12 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c
@@ -39,4 +39,4 @@ int main (int argc, char **argv)
return 0;
}
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-1.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-1.c
new file mode 100644
index 0000000..6eb59fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-1.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c)
+{
+ int t = *c;
+ for (int i = 0; i < 64; ++i)
+ {
+ b[i] = a[i] - t;
+ t = a[i];
+ }
+}
+
+int a[64], b[64];
+
+int
+main ()
+{
+ check_vect ();
+ for (int i = 0; i < 64; ++i)
+ {
+ a[i] = i;
+ __asm__ volatile ("" ::: "memory");
+ }
+ int c = 7;
+ foo (a, b, &c);
+ for (int i = 1; i < 64; ++i)
+ if (b[i] != a[i] - a[i-1])
+ abort ();
+ if (b[0] != -7)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-2.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-2.c
new file mode 100644
index 0000000..97efaaa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+foo (int * __restrict__ a, short * __restrict__ b, int * __restrict__ c)
+{
+ int t = *c;
+ for (int i = 0; i < 64; ++i)
+ {
+ b[i] = a[i] - t;
+ t = a[i];
+ }
+}
+
+int a[64];
+short b[64];
+
+int
+main ()
+{
+ check_vect ();
+ for (int i = 0; i < 64; ++i)
+ {
+ a[i] = i;
+ __asm__ volatile ("" ::: "memory");
+ }
+ int c = 7;
+ foo (a, b, &c);
+ for (int i = 1; i < 64; ++i)
+ if (b[i] != a[i] - a[i-1])
+ abort ();
+ if (b[0] != -7)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-3.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-3.c
new file mode 100644
index 0000000..621a5d8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-3.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+foo (int * __restrict__ a, signed char * __restrict__ b, int * __restrict__ c)
+{
+ int t = *c;
+ for (int i = 0; i < 64; ++i)
+ {
+ b[i] = a[i] - t;
+ t = a[i];
+ }
+}
+
+int a[64];
+signed char b[64];
+
+int
+main ()
+{
+ check_vect ();
+ for (int i = 0; i < 64; ++i)
+ {
+ a[i] = i;
+ __asm__ volatile ("" ::: "memory");
+ }
+ int c = 7;
+ foo (a, b, &c);
+ for (int i = 1; i < 64; ++i)
+ if (b[i] != a[i] - a[i-1])
+ abort ();
+ if (b[0] != -7)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-4.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-4.c
new file mode 100644
index 0000000..f6dbc49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-4.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c)
+{
+ int t1 = *c;
+ int t2 = *c;
+ for (int i = 0; i < 64; i+=2)
+ {
+ b[i] = a[i] - t1;
+ t1 = a[i];
+ b[i+1] = a[i+1] - t2;
+ t2 = a[i+1];
+ }
+}
+
+int a[64], b[64];
+
+int
+main ()
+{
+ check_vect ();
+ for (int i = 0; i < 64; ++i)
+ {
+ a[i] = i;
+ __asm__ volatile ("" ::: "memory");
+ }
+ int c = 7;
+ foo (a, b, &c);
+ for (int i = 2; i < 64; i+=2)
+ if (b[i] != a[i] - a[i-2]
+ || b[i+1] != a[i+1] - a[i-1])
+ abort ();
+ if (b[0] != -7 || b[1] != -6)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-5.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-5.c
new file mode 100644
index 0000000..19c56df
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-5.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+foo (int * __restrict__ a, short * __restrict__ b, int * __restrict__ c)
+{
+ int t1 = *c;
+ int t2 = *c;
+ for (int i = 0; i < 64; i+=2)
+ {
+ b[i] = a[i] - t1;
+ t1 = a[i];
+ b[i+1] = a[i+1] - t2;
+ t2 = a[i+1];
+ }
+}
+
+int a[64];
+short b[64];
+
+int
+main ()
+{
+ check_vect ();
+ for (int i = 0; i < 64; ++i)
+ {
+ a[i] = i;
+ __asm__ volatile ("" ::: "memory");
+ }
+ int c = 7;
+ foo (a, b, &c);
+ for (int i = 2; i < 64; i+=2)
+ if (b[i] != a[i] - a[i-2]
+ || b[i+1] != a[i+1] - a[i-1])
+ abort ();
+ if (b[0] != -7 || b[1] != -6)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-6.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-6.c
new file mode 100644
index 0000000..e771268
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-6.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c, int n)
+{
+ int t = *c;
+ for (int i = 0; i < n; ++i)
+ {
+ b[i] = a[i] - t;
+ t = a[i];
+ }
+}
+
+int a[64], b[64];
+
+int
+main ()
+{
+ check_vect ();
+ for (int i = 0; i < 64; ++i)
+ {
+ a[i] = i;
+ __asm__ volatile ("" ::: "memory");
+ }
+ int c = 7;
+ foo (a, b, &c, 63);
+ for (int i = 1; i < 63; ++i)
+ if (b[i] != a[i] - a[i-1])
+ abort ();
+ if (b[0] != -7)
+ abort ();
+ return 0;
+}
+
+/* ??? We miss epilogue handling for first order recurrences. */
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_fully_masked } } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 98a943d..63e8654 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -529,6 +529,45 @@ vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi)
return false;
}
+/* Returns true if Phi is a first-order recurrence. A first-order
+ recurrence is a non-reduction recurrence relation in which the value of
+ the recurrence in the current loop iteration equals a value defined in
+ the previous iteration. */
+
+static bool
+vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop,
+ gphi *phi)
+{
+ /* Ensure the loop latch definition is from within the loop. */
+ edge latch = loop_latch_edge (loop);
+ tree ldef = PHI_ARG_DEF_FROM_EDGE (phi, latch);
+ if (TREE_CODE (ldef) != SSA_NAME
+ || SSA_NAME_IS_DEFAULT_DEF (ldef)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (ldef))))
+ return false;
+
+ tree def = gimple_phi_result (phi);
+
+ /* Ensure every use_stmt of the phi node is dominated by the latch
+ definition. */
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
+ if (!is_gimple_debug (USE_STMT (use_p))
+ && (SSA_NAME_DEF_STMT (ldef) == USE_STMT (use_p)
+ || !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (ldef),
+ USE_STMT (use_p))))
+ return false;
+
+ /* First-order recurrence autovectorization needs shuffle vector. */
+ tree scalar_type = TREE_TYPE (def);
+ tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
+ if (!vectype)
+ return false;
+
+ return true;
+}
+
/* Function vect_analyze_scalar_cycles_1.
Examine the cross iteration def-use cycles of scalar variables
@@ -666,6 +705,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
}
}
}
+ else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi))
+ STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_first_order_recurrence;
else
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1810,7 +1851,8 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
|| STMT_VINFO_LIVE_P (stmt_info))
- && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
+ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def
+ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
/* A scalar-dependence cycle that we don't support. */
return opt_result::failure_at (phi,
"not vectorized:"
@@ -1831,6 +1873,11 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
&& ! PURE_SLP_STMT (stmt_info))
ok = vectorizable_reduction (loop_vinfo,
stmt_info, NULL, NULL, &cost_vec);
+ else if ((STMT_VINFO_DEF_TYPE (stmt_info)
+ == vect_first_order_recurrence)
+ && ! PURE_SLP_STMT (stmt_info))
+ ok = vectorizable_recurr (loop_vinfo, stmt_info, NULL, NULL,
+ &cost_vec);
}
/* SLP PHIs are tested by vect_slp_analyze_node_operations. */
@@ -8290,6 +8337,178 @@ vectorizable_phi (vec_info *,
return true;
}
+/* Vectorizes first order recurrences. An overview of the transformation
+ is described below. Suppose we have the following loop.
+
+ int t = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ b[i] = a[i] - t;
+ t = a[i];
+ }
+
+ There is a first-order recurrence on 'a'. For this loop, the scalar IR
+ looks (simplified) like:
+
+ scalar.preheader:
+ init = 0;
+
+ scalar.body:
+ i = PHI <0(scalar.preheader), i+1(scalar.body)>
+ _2 = PHI <(init(scalar.preheader), <_1(scalar.body)>
+ _1 = a[i]
+ b[i] = _1 - _2
+ if (i < n) goto scalar.body
+
+ In this example, _2 is a recurrence because it's value depends on the
+ previous iteration. We vectorize this as (VF = 4)
+
+ vector.preheader:
+ vect_init = vect_cst(..., ..., ..., 0)
+
+ vector.body
+ i = PHI <0(vector.preheader), i+4(vector.body)>
+ vect_1 = PHI <vect_init(vector.preheader), v2(vector.body)>
+ vect_2 = a[i, i+1, i+2, i+3];
+ vect_3 = vec_perm (vect_1, vect_2, { 3, 4, 5, 6 })
+ b[i, i+1, i+2, i+3] = vect_2 - vect_3
+ if (..) goto vector.body
+
+ In this function, vectorizable_recurr, we code generate both the
+ vector PHI node and the permute since those together compute the
+ vectorized value of the scalar PHI. We do not yet have the
+ backedge value to fill in there nor into the vec_perm. Those
+ are filled in maybe_set_vectorized_backedge_value and
+ vect_schedule_scc.
+
+ TODO: Since the scalar loop does not have a use of the recurrence
+ outside of the loop the natural way to implement peeling via
+ vectorizing the live value doesn't work. For now peeling of loops
+ with a recurrence is not implemented. For SLP the supported cases
+ are restricted to those requiring a single vector recurrence PHI. */
+
+bool
+vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
+ gimple **vec_stmt, slp_tree slp_node,
+ stmt_vector_for_cost *cost_vec)
+{
+ if (!loop_vinfo || !is_a<gphi *> (stmt_info->stmt))
+ return false;
+
+ gphi *phi = as_a<gphi *> (stmt_info->stmt);
+
+ /* So far we only support first-order recurrence auto-vectorization. */
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
+ return false;
+
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ unsigned ncopies;
+ if (slp_node)
+ ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ else
+ ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ poly_int64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned dist = slp_node ? SLP_TREE_LANES (slp_node) : 1;
+ /* We need to be able to make progress with a single vector. */
+ if (maybe_gt (dist * 2, nunits))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "first order recurrence exceeds half of "
+ "a vector\n");
+ return false;
+ }
+
+ /* First-order recurrence autovectorization needs to handle permutation
+ with indices = [nunits-1, nunits, nunits+1, ...]. */
+ vec_perm_builder sel (nunits, 1, 3);
+ for (int i = 0; i < 3; ++i)
+ sel.quick_push (nunits - dist + i);
+ vec_perm_indices indices (sel, 2, nunits);
+
+ if (!vec_stmt) /* transformation not required. */
+ {
+ if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
+ indices))
+ return false;
+
+ if (slp_node)
+ {
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
+ }
+ /* The recurrence costs the initialization vector and one permute
+ for each copy. */
+ unsigned prologue_cost = record_stmt_cost (cost_vec, 1, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+ unsigned inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vectorizable_recurr: inside_cost = %d, "
+ "prologue_cost = %d .\n", inside_cost,
+ prologue_cost);
+
+ STMT_VINFO_TYPE (stmt_info) = recurr_info_type;
+ return true;
+ }
+
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+ basic_block bb = gimple_bb (phi);
+ tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe);
+ tree vec_init = build_vector_from_val (vectype, preheader);
+ vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL);
+
+ /* Create the vectorized first-order PHI node. */
+ tree vec_dest = vect_get_new_vect_var (vectype,
+ vect_simple_var, "vec_recur_");
+ gphi *new_phi = create_phi_node (vec_dest, bb);
+ add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION);
+
+ /* Insert shuffles the first-order recurrence autovectorization.
+ result = VEC_PERM <vec_recur, vect_1, index[nunits-1, nunits, ...]>. */
+ tree perm = vect_gen_perm_mask_checked (vectype, indices);
+
+ /* Insert the required permute after the latch definition. The
+ second and later operands are tentative and will be updated when we have
+ vectorized the latch definition. */
+ edge le = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
+ gimple_stmt_iterator gsi2
+ = gsi_for_stmt (SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, le)));
+ gsi_next (&gsi2);
+
+ for (unsigned i = 0; i < ncopies; ++i)
+ {
+ vec_dest = make_ssa_name (vectype);
+ gassign *vperm
+ = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
+ i == 0 ? gimple_phi_result (new_phi) : NULL,
+ NULL, perm);
+ vect_finish_stmt_generation (loop_vinfo, stmt_info, vperm, &gsi2);
+
+ if (slp_node)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (vperm);
+ else
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (vperm);
+ }
+
+ if (!slp_node)
+ *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
+ return true;
+}
+
/* Return true if VECTYPE represents a vector that requires lowering
by the vector lowering pass. */
@@ -10242,27 +10461,53 @@ maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
imm_use_iterator iter;
use_operand_p use_p;
FOR_EACH_IMM_USE_FAST (use_p, iter, def)
- if (gphi *phi = dyn_cast <gphi *> (USE_STMT (use_p)))
- if (gimple_bb (phi)->loop_father->header == gimple_bb (phi)
- && (phi_info = loop_vinfo->lookup_stmt (phi))
- && STMT_VINFO_RELEVANT_P (phi_info)
- && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
+ {
+ gphi *phi = dyn_cast <gphi *> (USE_STMT (use_p));
+ if (!phi)
+ continue;
+ if (!(gimple_bb (phi)->loop_father->header == gimple_bb (phi)
+ && (phi_info = loop_vinfo->lookup_stmt (phi))
+ && STMT_VINFO_RELEVANT_P (phi_info)))
+ continue;
+ loop_p loop = gimple_bb (phi)->loop_father;
+ edge e = loop_latch_edge (loop);
+ if (PHI_ARG_DEF_FROM_EDGE (phi, e) != def)
+ continue;
+
+ if (VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
&& STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
&& STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
{
- loop_p loop = gimple_bb (phi)->loop_father;
- edge e = loop_latch_edge (loop);
- if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def)
+ vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
+ vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
+ gcc_assert (phi_defs.length () == latch_defs.length ());
+ for (unsigned i = 0; i < phi_defs.length (); ++i)
+ add_phi_arg (as_a <gphi *> (phi_defs[i]),
+ gimple_get_lhs (latch_defs[i]), e,
+ gimple_phi_arg_location (phi, e->dest_idx));
+ }
+ else if (STMT_VINFO_DEF_TYPE (phi_info) == vect_first_order_recurrence)
+ {
+ /* For first order recurrences we have to update both uses of
+ the latch definition, the one in the PHI node and the one
+ in the generated VEC_PERM_EXPR. */
+ vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
+ vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
+ gcc_assert (phi_defs.length () == latch_defs.length ());
+ tree phidef = gimple_assign_rhs1 (phi_defs[0]);
+ gphi *vphi = as_a <gphi *> (SSA_NAME_DEF_STMT (phidef));
+ for (unsigned i = 0; i < phi_defs.length (); ++i)
{
- vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
- vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
- gcc_assert (phi_defs.length () == latch_defs.length ());
- for (unsigned i = 0; i < phi_defs.length (); ++i)
- add_phi_arg (as_a <gphi *> (phi_defs[i]),
- gimple_get_lhs (latch_defs[i]), e,
- gimple_phi_arg_location (phi, e->dest_idx));
+ gassign *perm = as_a <gassign *> (phi_defs[i]);
+ if (i > 0)
+ gimple_assign_set_rhs1 (perm, gimple_get_lhs (latch_defs[i-1]));
+ gimple_assign_set_rhs2 (perm, gimple_get_lhs (latch_defs[i]));
+ update_stmt (perm);
}
+ add_phi_arg (vphi, gimple_get_lhs (latch_defs.last ()), e,
+ gimple_phi_arg_location (phi, e->dest_idx));
}
+ }
}
/* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
@@ -10671,6 +10916,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
&& ! PURE_SLP_STMT (stmt_info))
{
@@ -10696,7 +10942,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
- || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence)
&& ! PURE_SLP_STMT (stmt_info))
maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info);
}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index af27fd5..e54414f 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -693,6 +693,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
case vect_reduction_def:
case vect_induction_def:
case vect_nested_cycle:
+ case vect_first_order_recurrence:
break;
default:
@@ -1732,7 +1733,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
}
else if (def_type == vect_reduction_def
|| def_type == vect_double_reduction_def
- || def_type == vect_nested_cycle)
+ || def_type == vect_nested_cycle
+ || def_type == vect_first_order_recurrence)
{
/* Else def types have to match. */
stmt_vec_info other_info;
@@ -1746,7 +1748,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
}
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
/* Reduction initial values are not explicitely represented. */
- if (!nested_in_vect_loop_p (loop, stmt_info))
+ if (def_type != vect_first_order_recurrence
+ && !nested_in_vect_loop_p (loop, stmt_info))
skip_args[loop_preheader_edge (loop)->dest_idx] = true;
/* Reduction chain backedge defs are filled manually.
??? Need a better way to identify a SLP reduction chain PHI.
@@ -9210,11 +9213,34 @@ vect_schedule_scc (vec_info *vinfo, slp_tree node, slp_instance instance,
child = SLP_TREE_CHILDREN (phi_node)[dest_idx];
if (!child || SLP_TREE_DEF_TYPE (child) != vect_internal_def)
continue;
+ unsigned n = SLP_TREE_VEC_STMTS (phi_node).length ();
/* Simply fill all args. */
- for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
- add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
- vect_get_slp_vect_def (child, i),
- e, gimple_phi_arg_location (phi, dest_idx));
+ if (STMT_VINFO_DEF_TYPE (SLP_TREE_REPRESENTATIVE (phi_node))
+ != vect_first_order_recurrence)
+ for (unsigned i = 0; i < n; ++i)
+ add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
+ vect_get_slp_vect_def (child, i),
+ e, gimple_phi_arg_location (phi, dest_idx));
+ else
+ {
+ /* Unless it is a first order recurrence which needs
+ args filled in for both the PHI node and the permutes. */
+ gimple *perm = SLP_TREE_VEC_STMTS (phi_node)[0];
+ gimple *rphi = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (perm));
+ add_phi_arg (as_a <gphi *> (rphi),
+ vect_get_slp_vect_def (child, n - 1),
+ e, gimple_phi_arg_location (phi, dest_idx));
+ for (unsigned i = 0; i < n; ++i)
+ {
+ gimple *perm = SLP_TREE_VEC_STMTS (phi_node)[i];
+ if (i > 0)
+ gimple_assign_set_rhs1 (perm,
+ vect_get_slp_vect_def (child, i - 1));
+ gimple_assign_set_rhs2 (perm,
+ vect_get_slp_vect_def (child, i));
+ update_stmt (perm);
+ }
+ }
}
}
}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index c8d1efc..4e0d75e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -11176,6 +11176,7 @@ vect_analyze_stmt (vec_info *vinfo,
break;
case vect_induction_def:
+ case vect_first_order_recurrence:
gcc_assert (!bb_vinfo);
break;
@@ -11234,7 +11235,9 @@ vect_analyze_stmt (vec_info *vinfo,
|| vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
cost_vec)
|| vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
- stmt_info, NULL, node));
+ stmt_info, NULL, node)
+ || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
+ stmt_info, NULL, node, cost_vec));
else
{
if (bb_vinfo)
@@ -11404,6 +11407,12 @@ vect_transform_stmt (vec_info *vinfo,
gcc_assert (done);
break;
+ case recurr_info_type:
+ done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
+ stmt_info, &vec_stmt, slp_node, NULL);
+ gcc_assert (done);
+ break;
+
case phi_info_type:
done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
gcc_assert (done);
@@ -11804,6 +11813,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
case vect_nested_cycle:
dump_printf (MSG_NOTE, "nested cycle\n");
break;
+ case vect_first_order_recurrence:
+ dump_printf (MSG_NOTE, "first order recurrence\n");
+ break;
case vect_unknown_def_type:
dump_printf (MSG_NOTE, "unknown\n");
break;
@@ -11852,7 +11864,8 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
|| *dt == vect_induction_def
|| *dt == vect_reduction_def
|| *dt == vect_double_reduction_def
- || *dt == vect_nested_cycle)
+ || *dt == vect_nested_cycle
+ || *dt == vect_first_order_recurrence)
{
*vectype = STMT_VINFO_VECTYPE (def_stmt_info);
gcc_assert (*vectype != NULL_TREE);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 4870c75..016961d 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -65,6 +65,7 @@ enum vect_def_type {
vect_reduction_def,
vect_double_reduction_def,
vect_nested_cycle,
+ vect_first_order_recurrence,
vect_unknown_def_type
};
@@ -1027,6 +1028,7 @@ enum stmt_vec_info_type {
cycle_phi_info_type,
lc_phi_info_type,
phi_info_type,
+ recurr_info_type,
loop_exit_ctrl_vec_info_type
};
@@ -2331,6 +2333,8 @@ extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
gimple **, slp_tree);
extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
stmt_vector_for_cost *);
+extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info,
+ gimple **, slp_tree, stmt_vector_for_cost *);
extern bool vect_emulated_vector_p (tree);
extern bool vect_can_vectorize_without_simd_p (tree_code);
extern bool vect_can_vectorize_without_simd_p (code_helper);