aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103144-mul-1.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103144-mul-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103144-neg-1.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103144-neg-2.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103144-shift-1.c70
-rw-r--r--gcc/testsuite/gcc.target/i386/pr103144-shift-2.c79
-rw-r--r--gcc/tree-vect-loop-manip.cc37
-rw-r--r--gcc/tree-vect-loop.cc678
-rw-r--r--gcc/tree-vectorizer.h15
9 files changed, 1062 insertions, 14 deletions
diff --git a/gcc/testsuite/gcc.target/i386/pr103144-mul-1.c b/gcc/testsuite/gcc.target/i386/pr103144-mul-1.c
new file mode 100644
index 0000000..640c34f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103144-mul-1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-vect-details -mprefer-vector-width=256" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+
+#define N 10000
+
+void
+__attribute__((noipa))
+foo_mul (int* a, int b)
+{
+ for (int i = 0; i != N; i++)
+ {
+ a[i] = b;
+ b *= 3;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_mul_const (int* a)
+{
+ int b = 1;
+ for (int i = 0; i != N; i++)
+ {
+ a[i] = b;
+ b *= 3;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_mul_peel (int* a, int b)
+{
+ for (int i = 0; i != 39; i++)
+ {
+ a[i] = b;
+ b *= 3;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_mul_peel_const (int* a)
+{
+ int b = 1;
+ for (int i = 0; i != 39; i++)
+ {
+ a[i] = b;
+ b *= 3;
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr103144-mul-2.c b/gcc/testsuite/gcc.target/i386/pr103144-mul-2.c
new file mode 100644
index 0000000..39fdea3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103144-mul-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+#include <string.h>
+#include "pr103144-mul-1.c"
+
+typedef int v8si __attribute__((vector_size(32)));
+
+void
+avx2_test (void)
+{
+ int* epi32_exp = (int*) malloc (N * sizeof (int));
+ int* epi32_dst = (int*) malloc (N * sizeof (int));
+
+ __builtin_memset (epi32_exp, 0, N * sizeof (int));
+ int b = 8;
+ v8si init = __extension__(v8si) { b, b * 3, b * 9, b * 27, b * 81, b * 243, b * 729, b * 2187 };
+
+ for (int i = 0; i != N / 8; i++)
+ {
+ memcpy (epi32_exp + i * 8, &init, 32);
+ init *= 6561;
+ }
+
+ foo_mul (epi32_dst, b);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, N * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ foo_mul_peel (epi32_dst, b);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, 39 * 4) != 0)
+ __builtin_abort ();
+
+ init = __extension__(v8si) { 1, 3, 9, 27, 81, 243, 729, 2187 };
+ for (int i = 0; i != N / 8; i++)
+ {
+ memcpy (epi32_exp + i * 8, &init, 32);
+ init *= 6561;
+ }
+
+ foo_mul_const (epi32_dst);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, N * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ foo_mul_peel_const (epi32_dst);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, 39 * 4) != 0)
+ __builtin_abort ();
+
+ return;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr103144-neg-1.c b/gcc/testsuite/gcc.target/i386/pr103144-neg-1.c
new file mode 100644
index 0000000..f87b1d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103144-neg-1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-vect-details -mprefer-vector-width=256" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+
+#define N 10000
+
+void
+__attribute__((noipa))
+foo_neg (int* a, int b)
+{
+ for (int i = 0; i != N; i++)
+ {
+ a[i] = b;
+ b = -b;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_neg_const (int* a)
+{
+ int b = 1;
+ for (int i = 0; i != N; i++)
+ {
+ a[i] = b;
+ b = -b;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_neg_peel (int* a, int b, int n)
+{
+ for (int i = 0; i != n; i++)
+ {
+ a[i] = b;
+ b = -b;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_neg_const_peel (int* a, int n)
+{
+ int b = 1;
+ for (int i = 0; i != n; i++)
+ {
+ a[i] = b;
+ b = -b;
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr103144-neg-2.c b/gcc/testsuite/gcc.target/i386/pr103144-neg-2.c
new file mode 100644
index 0000000..bb8c22b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103144-neg-2.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+#include <string.h>
+#include "pr103144-neg-1.c"
+
+void
+avx2_test (void)
+{
+ int* epi32_exp = (int*) malloc (N * sizeof (int));
+ int* epi32_dst = (int*) malloc (N * sizeof (int));
+ long long* epi64_exp = (long long*) malloc (N * sizeof (int));
+
+ __builtin_memset (epi32_exp, 0, N * sizeof (int));
+ int b = 100;
+
+ for (int i = 0; i != N / 2; i++)
+ epi64_exp[i] = ((long long) b) | (((long long) -b) << 32);
+
+ memcpy (epi32_exp, epi64_exp, N * sizeof (int));
+ foo_neg (epi32_dst, b);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, N * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ foo_neg_peel (epi32_dst, b, 39);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, 39 * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ for (int i = 0; i != N / 2; i++)
+ epi64_exp[i] = ((long long) 1) | (((long long) -1) << 32);
+
+ memcpy (epi32_exp, epi64_exp, N * sizeof (int));
+ foo_neg_const (epi32_dst);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, N * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ foo_neg_const_peel (epi32_dst, 39);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, 39 * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ return;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr103144-shift-1.c b/gcc/testsuite/gcc.target/i386/pr103144-shift-1.c
new file mode 100644
index 0000000..2a69203
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103144-shift-1.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -fdump-tree-vect-details -mprefer-vector-width=256" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 6 "vect" } } */
+
+#define N 10000
+void
+__attribute__((noipa))
+foo_shl (int* a, int b)
+{
+ for (int i = 0; i != N; i++)
+ {
+ a[i] = b;
+ b <<= 1;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_ashr (int* a, int b)
+{
+ for (int i = 0; i != N; i++)
+ {
+ a[i] = b;
+ b >>= 1;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_lshr (unsigned int* a, unsigned int b)
+{
+ for (int i = 0; i != N; i++)
+ {
+ a[i] = b;
+ b >>= 1U;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_shl_peel (int* a, int b)
+{
+ for (int i = 0; i != 39; i++)
+ {
+ a[i] = b;
+ b <<= 1;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_ashr_peel (int* a, int b)
+{
+ for (int i = 0; i != 39; i++)
+ {
+ a[i] = b;
+ b >>= 1;
+ }
+}
+
+void
+__attribute__((noipa))
+foo_lshr_peel (unsigned int* a, unsigned int b)
+{
+ for (int i = 0; i != 39; i++)
+ {
+ a[i] = b;
+ b >>= 1U;
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr103144-shift-2.c b/gcc/testsuite/gcc.target/i386/pr103144-shift-2.c
new file mode 100644
index 0000000..6f47719
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103144-shift-2.c
@@ -0,0 +1,79 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+#include <string.h>
+#include "pr103144-shift-1.c"
+
+typedef int v8si __attribute__((vector_size(32)));
+typedef unsigned int v8usi __attribute__((vector_size(32)));
+
+void
+avx2_test (void)
+{
+ int* epi32_exp = (int*) malloc (N * sizeof (int));
+ int* epi32_dst = (int*) malloc (N * sizeof (int));
+ unsigned int* epu32_exp = (unsigned int*) malloc (N * sizeof (int));
+ unsigned int* epu32_dst = (unsigned int*) malloc (N * sizeof (int));
+
+ __builtin_memset (epi32_exp, 0, N * sizeof (int));
+ int b = 8;
+ v8si init = __extension__(v8si) { b, b << 1, b << 2, b << 3, b << 4, b << 5, b << 6, b << 7 };
+
+ for (int i = 0; i != N / 8; i++)
+ {
+ memcpy (epi32_exp + i * 8, &init, 32);
+ init <<= 8;
+ }
+
+ foo_shl (epi32_dst, b);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, N * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ foo_shl_peel (epi32_dst, b);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, 39 * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ b = -11111;
+ init = __extension__(v8si) { b, b >> 1, b >> 2, b >> 3, b >> 4, b >> 5, b >> 6, b >> 7 };
+ for (int i = 0; i != N / 8; i++)
+ {
+ memcpy (epi32_exp + i * 8, &init, 32);
+ init >>= 8;
+ }
+
+ foo_ashr (epi32_dst, b);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, N * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ foo_ashr_peel (epi32_dst, b);
+ if (__builtin_memcmp (epi32_dst, epi32_exp, 39 * sizeof (int)) != 0)
+ {
+ for (int i = 0; i != 39; i++)
+ {
+ printf ("epi32_dst[%d] is %d ----", i, epi32_dst[i]);
+ printf ("epi32_exp[%d] is %d\n", i, epi32_exp[i]);
+ }
+ __builtin_abort ();
+ }
+
+ __builtin_memset (epu32_exp, 0, N * sizeof (int));
+ unsigned int c = 11111111;
+ v8usi initu = __extension__(v8usi) { c, c >> 1U, c >> 2U, c >> 3U, c >> 4U, c >> 5U, c >> 6U, c >> 7U };
+ for (int i = 0; i != N / 8; i++)
+ {
+ memcpy (epu32_exp + i * 8, &initu, 32);
+ initu >>= 8U;
+ }
+
+ foo_lshr (epu32_dst, c);
+ if (__builtin_memcmp (epu32_dst, epu32_exp, N * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ foo_lshr_peel (epu32_dst, c);
+ if (__builtin_memcmp (epu32_dst, epu32_exp, 39 * sizeof (int)) != 0)
+ __builtin_abort ();
+
+ return;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index b68e6cd..74b221a 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1560,15 +1560,28 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
gcc_assert (!tree_is_chrec (step_expr));
init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
+ gimple_seq stmts = NULL;
+ enum vect_induction_op_type induction_type
+ = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
- off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
- fold_convert (TREE_TYPE (step_expr), niters),
- step_expr);
- if (POINTER_TYPE_P (type))
- ni = fold_build_pointer_plus (init_expr, off);
+ if (induction_type == vect_step_op_add)
+ {
+ off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
+ fold_convert (TREE_TYPE (step_expr), niters),
+ step_expr);
+ if (POINTER_TYPE_P (type))
+ ni = fold_build_pointer_plus (init_expr, off);
+ else
+ ni = fold_build2 (PLUS_EXPR, type,
+ init_expr, fold_convert (type, off));
+ }
+ /* Don't bother call vect_peel_nonlinear_iv_init. */
+ else if (induction_type == vect_step_op_neg)
+ ni = init_expr;
else
- ni = fold_build2 (PLUS_EXPR, type,
- init_expr, fold_convert (type, off));
+ ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
+ niters, step_expr,
+ induction_type);
var = create_tmp_var (type, "tmp");
@@ -1577,9 +1590,15 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
ni_name = force_gimple_operand (ni, &new_stmts, false, var);
/* Exit_bb shouldn't be empty. */
if (!gsi_end_p (last_gsi))
- gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
+ {
+ gsi_insert_seq_after (&last_gsi, stmts, GSI_SAME_STMT);
+ gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
+ }
else
- gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
+ {
+ gsi_insert_seq_before (&last_gsi, stmts, GSI_SAME_STMT);
+ gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
+ }
/* Fix phi expressions in the successor bb. */
adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 24556b5..8f88f17 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -425,6 +425,77 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
return true;
}
+/* Function vect_is_nonlinear_iv_evolution
+
+ Only support nonlinear induction for integer type
+ 1. neg
+ 2. mul by constant
+ 3. lshift/rshift by constant.
+
+ For neg induction, return a fake step as integer -1. */
+static bool
+vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
+ gphi* loop_phi_node, tree *init, tree *step)
+{
+ tree init_expr, ev_expr, result, op1, op2;
+ gimple* def;
+
+ if (gimple_phi_num_args (loop_phi_node) != 2)
+ return false;
+
+ init_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_preheader_edge (loop));
+ ev_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_latch_edge (loop));
+
+ /* Support nonlinear induction only for integer type. */
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (init_expr)))
+ return false;
+
+ *init = init_expr;
+ result = PHI_RESULT (loop_phi_node);
+
+ if (TREE_CODE (ev_expr) != SSA_NAME
+ || ((def = SSA_NAME_DEF_STMT (ev_expr)), false)
+ || !is_gimple_assign (def))
+ return false;
+
+ enum tree_code t_code = gimple_assign_rhs_code (def);
+ switch (t_code)
+ {
+ case NEGATE_EXPR:
+ if (gimple_assign_rhs1 (def) != result)
+ return false;
+ *step = build_int_cst (TREE_TYPE (init_expr), -1);
+ STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_neg;
+ break;
+
+ case RSHIFT_EXPR:
+ case LSHIFT_EXPR:
+ case MULT_EXPR:
+ op1 = gimple_assign_rhs1 (def);
+ op2 = gimple_assign_rhs2 (def);
+ if (TREE_CODE (op2) != INTEGER_CST
+ || op1 != result)
+ return false;
+ *step = op2;
+ if (t_code == LSHIFT_EXPR)
+ STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_shl;
+ else if (t_code == RSHIFT_EXPR)
+ STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_shr;
+ /* NEGATE_EXPR and MULT_EXPR are both vect_step_op_mul. */
+ else
+ STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_mul;
+ break;
+
+ default:
+ return false;
+ }
+
+ STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = *init;
+ STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = *step;
+
+ return true;
+}
+
/* Return true if PHI, described by STMT_INFO, is the inner PHI in
what we are assuming is a double reduction. For example, given
a structure like this:
@@ -513,11 +584,16 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
= evolution_part_in_loop_num (access_fn, loop->num);
}
- if (!access_fn
- || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
- || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
- || (LOOP_VINFO_LOOP (loop_vinfo) != loop
- && TREE_CODE (step) != INTEGER_CST))
+ if ((!access_fn
+ || vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
+ || !vect_is_simple_iv_evolution (loop->num, access_fn,
+ &init, &step)
+ || (LOOP_VINFO_LOOP (loop_vinfo) != loop
+ && TREE_CODE (step) != INTEGER_CST))
+ /* Only handle nonlinear iv for same loop. */
+ && (LOOP_VINFO_LOOP (loop_vinfo) != loop
+ || !vect_is_nonlinear_iv_evolution (loop, stmt_vinfo,
+ phi, &init, &step)))
{
worklist.safe_push (stmt_vinfo);
continue;
@@ -8233,6 +8309,591 @@ vect_can_vectorize_without_simd_p (code_helper code)
&& vect_can_vectorize_without_simd_p (tree_code (code)));
}
+/* Create vector init for vectorized iv. */
+static tree
+vect_create_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
+ tree step_expr, poly_uint64 nunits,
+ tree vectype,
+ enum vect_induction_op_type induction_type)
+{
+ unsigned HOST_WIDE_INT const_nunits;
+ tree vec_shift, vec_init, new_name;
+ unsigned i;
+ tree itype = TREE_TYPE (vectype);
+
+ /* iv_loop is the loop to be vectorized. Create:
+ vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr). */
+ new_name = gimple_convert (stmts, itype, init_expr);
+ switch (induction_type)
+ {
+ case vect_step_op_shr:
+ case vect_step_op_shl:
+ /* Build the Initial value from shift_expr. */
+ vec_init = gimple_build_vector_from_val (stmts,
+ vectype,
+ new_name);
+ vec_shift = gimple_build (stmts, VEC_SERIES_EXPR, vectype,
+ build_zero_cst (itype), step_expr);
+ vec_init = gimple_build (stmts,
+ (induction_type == vect_step_op_shr
+ ? RSHIFT_EXPR : LSHIFT_EXPR),
+ vectype, vec_init, vec_shift);
+ break;
+
+ case vect_step_op_neg:
+ {
+ vec_init = gimple_build_vector_from_val (stmts,
+ vectype,
+ new_name);
+ tree vec_neg = gimple_build (stmts, NEGATE_EXPR,
+ vectype, vec_init);
+ /* The encoding has 2 interleaved stepped patterns. */
+ vec_perm_builder sel (nunits, 2, 3);
+ sel.quick_grow (6);
+ for (i = 0; i < 3; i++)
+ {
+ sel[2 * i] = i;
+ sel[2 * i + 1] = i + nunits;
+ }
+ vec_perm_indices indices (sel, 2, nunits);
+ tree perm_mask_even
+ = vect_gen_perm_mask_checked (vectype, indices);
+ vec_init = gimple_build (stmts, VEC_PERM_EXPR,
+ vectype,
+ vec_init, vec_neg,
+ perm_mask_even);
+ }
+ break;
+
+ case vect_step_op_mul:
+ {
+ /* Use unsigned mult to avoid UD integer overflow. */
+ gcc_assert (nunits.is_constant (&const_nunits));
+ tree utype = unsigned_type_for (itype);
+ tree uvectype = build_vector_type (utype,
+ TYPE_VECTOR_SUBPARTS (vectype));
+ new_name = gimple_convert (stmts, utype, new_name);
+ vec_init = gimple_build_vector_from_val (stmts,
+ uvectype,
+ new_name);
+ tree_vector_builder elts (uvectype, const_nunits, 1);
+ tree elt_step = build_one_cst (utype);
+
+ elts.quick_push (elt_step);
+ for (i = 1; i < const_nunits; i++)
+ {
+ /* Create: new_name_i = new_name + step_expr. */
+ elt_step = gimple_build (stmts, MULT_EXPR,
+ utype, elt_step, step_expr);
+ elts.quick_push (elt_step);
+ }
+ /* Create a vector from [new_name_0, new_name_1, ...,
+ new_name_nunits-1]. */
+ tree vec_mul = gimple_build_vector (stmts, &elts);
+ vec_init = gimple_build (stmts, MULT_EXPR, uvectype,
+ vec_init, vec_mul);
+ vec_init = gimple_convert (stmts, vectype, vec_init);
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return vec_init;
+}
+
+/* Peel init_expr by skip_niter for induction_type. */
+tree
+vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
+ tree skip_niters, tree step_expr,
+ enum vect_induction_op_type induction_type)
+{
+ gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST);
+ tree type = TREE_TYPE (init_expr);
+ unsigned prec = TYPE_PRECISION (type);
+ switch (induction_type)
+ {
+ case vect_step_op_neg:
+ if (TREE_INT_CST_LOW (skip_niters) % 2)
+ init_expr = gimple_build (stmts, NEGATE_EXPR, type, init_expr);
+ /* else no change. */
+ break;
+
+ case vect_step_op_shr:
+ case vect_step_op_shl:
+ skip_niters = gimple_convert (stmts, type, skip_niters);
+ step_expr = gimple_build (stmts, MULT_EXPR, type, step_expr, skip_niters);
+ /* When shift mount >= precision, need to avoid UD.
+ In the original loop, there's no UD, and according to semantic,
+ init_expr should be 0 for lshr, ashl, and >>= (prec - 1) for ashr. */
+ if (!tree_fits_uhwi_p (step_expr)
+ || tree_to_uhwi (step_expr) >= prec)
+ {
+ if (induction_type == vect_step_op_shl
+ || TYPE_UNSIGNED (type))
+ init_expr = build_zero_cst (type);
+ else
+ init_expr = gimple_build (stmts, RSHIFT_EXPR, type,
+ init_expr,
+ wide_int_to_tree (type, prec - 1));
+ }
+ else
+ init_expr = gimple_build (stmts, (induction_type == vect_step_op_shr
+ ? RSHIFT_EXPR : LSHIFT_EXPR),
+ type, init_expr, step_expr);
+ break;
+
+ case vect_step_op_mul:
+ {
+ tree utype = unsigned_type_for (type);
+ init_expr = gimple_convert (stmts, utype, init_expr);
+ unsigned skipn = TREE_INT_CST_LOW (skip_niters);
+ wide_int begin = wi::to_wide (step_expr);
+ for (unsigned i = 0; i != skipn - 1; i++)
+ begin = wi::mul (begin, wi::to_wide (step_expr));
+ tree mult_expr = wide_int_to_tree (utype, begin);
+ init_expr = gimple_build (stmts, MULT_EXPR, utype, init_expr, mult_expr);
+ init_expr = gimple_convert (stmts, type, init_expr);
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return init_expr;
+}
+
+/* Create vector step for vectorized iv. */
+static tree
+vect_create_nonlinear_iv_step (gimple_seq* stmts, tree step_expr,
+ poly_uint64 vf,
+ enum vect_induction_op_type induction_type)
+{
+ tree expr = build_int_cst (TREE_TYPE (step_expr), vf);
+ tree new_name = NULL;
+ /* Step should be pow (step, vf) for mult induction. */
+ if (induction_type == vect_step_op_mul)
+ {
+ gcc_assert (vf.is_constant ());
+ wide_int begin = wi::to_wide (step_expr);
+
+ for (unsigned i = 0; i != vf.to_constant () - 1; i++)
+ begin = wi::mul (begin, wi::to_wide (step_expr));
+
+ new_name = wide_int_to_tree (TREE_TYPE (step_expr), begin);
+ }
+ else if (induction_type == vect_step_op_neg)
+ /* Do nothing. */
+ ;
+ else
+ new_name = gimple_build (stmts, MULT_EXPR, TREE_TYPE (step_expr),
+ expr, step_expr);
+ return new_name;
+}
+
+static tree
+vect_create_nonlinear_iv_vec_step (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info,
+ tree new_name, tree vectype,
+ enum vect_induction_op_type induction_type)
+{
+ /* No step is needed for neg induction. */
+ if (induction_type == vect_step_op_neg)
+ return NULL;
+
+ tree t = unshare_expr (new_name);
+ gcc_assert (CONSTANT_CLASS_P (new_name)
+ || TREE_CODE (new_name) == SSA_NAME);
+ tree new_vec = build_vector_from_val (vectype, t);
+ tree vec_step = vect_init_vector (loop_vinfo, stmt_info,
+ new_vec, vectype, NULL);
+ return vec_step;
+}
+
+/* Update vectorized iv with vect_step, induc_def is init. */
+static tree
+vect_update_nonlinear_iv (gimple_seq* stmts, tree vectype,
+ tree induc_def, tree vec_step,
+ enum vect_induction_op_type induction_type)
+{
+ tree vec_def = induc_def;
+ switch (induction_type)
+ {
+ case vect_step_op_mul:
+ {
+ /* Use unsigned mult to avoid UD integer overflow. */
+ tree uvectype
+ = build_vector_type (unsigned_type_for (TREE_TYPE (vectype)),
+ TYPE_VECTOR_SUBPARTS (vectype));
+ vec_def = gimple_convert (stmts, uvectype, vec_def);
+ vec_step = gimple_convert (stmts, uvectype, vec_step);
+ vec_def = gimple_build (stmts, MULT_EXPR, uvectype,
+ vec_def, vec_step);
+ vec_def = gimple_convert (stmts, vectype, vec_def);
+ }
+ break;
+
+ case vect_step_op_shr:
+ vec_def = gimple_build (stmts, RSHIFT_EXPR, vectype,
+ vec_def, vec_step);
+ break;
+
+ case vect_step_op_shl:
+ vec_def = gimple_build (stmts, LSHIFT_EXPR, vectype,
+ vec_def, vec_step);
+ break;
+ case vect_step_op_neg:
+ vec_def = induc_def;
+ /* Do nothing. */
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ return vec_def;
+
+}
+/* Function vectorizable_induction
+
+ Check if STMT_INFO performs an nonlinear induction computation that can be
+ vectorized. If VEC_STMT is also passed, vectorize the induction PHI: create
+ a vectorized phi to replace it, put it in VEC_STMT, and add it to the same
+ basic block.
+ Return true if STMT_INFO is vectorizable in this way. */
+
+static bool
+vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
+ stmt_vec_info stmt_info,
+ gimple **vec_stmt, slp_tree slp_node,
+ stmt_vector_for_cost *cost_vec)
+{
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ unsigned ncopies;
+ bool nested_in_vect_loop = false;
+ class loop *iv_loop;
+ tree vec_def;
+ edge pe = loop_preheader_edge (loop);
+ basic_block new_bb;
+ tree vec_init, vec_step;
+ tree new_name;
+ gimple *new_stmt;
+ gphi *induction_phi;
+ tree induc_def, vec_dest;
+ tree init_expr, step_expr;
+ tree niters_skip;
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ unsigned i;
+ gimple_stmt_iterator si;
+
+ gphi *phi = dyn_cast <gphi *> (stmt_info->stmt);
+
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ enum vect_induction_op_type induction_type
+ = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
+
+ gcc_assert (induction_type > vect_step_op_add);
+
+ if (slp_node)
+ ncopies = 1;
+ else
+ ncopies = vect_get_num_copies (loop_vinfo, vectype);
+ gcc_assert (ncopies >= 1);
+
+ /* FORNOW. Only handle nonlinear induction in the same loop. */
+ if (nested_in_vect_loop_p (loop, stmt_info))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "nonlinear induction in nested loop.\n");
+ return false;
+ }
+
+ iv_loop = loop;
+ gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
+
+ /* TODO: Support slp for nonlinear iv. There should be separate vector iv
+ update for each iv and a permutation to generate wanted vector iv. */
+ if (slp_node)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "SLP induction not supported for nonlinear"
+ " induction.\n");
+ return false;
+ }
+
+ /* Init_expr will be update by vect_update_ivs_after_vectorizer,
+ if niters is unkown:
+ For shift, when shift mount >= precision, there would be UD.
+ For mult, don't known how to generate
+ init_expr * pow (step, niters) for variable niters.
+ For neg, it should be ok, since niters of vectorized main loop
+ will always be multiple of 2. */
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && induction_type != vect_step_op_neg)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Peeling for epilogue is not supported"
+ " for nonlinear induction except neg"
+ " when iteration count is unknown.\n");
+ return false;
+ }
+
+ /* Also doens't support peel for neg when niter is variable.
+ ??? generate something like niter_expr & 1 ? init_expr : -init_expr? */
+ niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ if (niters_skip != NULL_TREE
+ && TREE_CODE (niters_skip) != INTEGER_CST)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Peeling for alignement is not supported"
+ " for nonlinear induction when niters_skip"
+ " is not constant.\n");
+ return false;
+ }
+
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && induction_type == vect_step_op_mul)
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "floating point nonlinear induction vectorization"
+ " not supported.\n");
+ return false;
+ }
+
+ step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
+ init_expr = vect_phi_initial_value (phi);
+ gcc_assert (step_expr != NULL_TREE && init_expr != NULL
+ && TREE_CODE (step_expr) == INTEGER_CST);
+ /* step_expr should be aligned with init_expr,
+ .i.e. uint64 a >> 1, step is int, but vector<uint64> shift is used. */
+ step_expr = fold_convert (TREE_TYPE (vectype), step_expr);
+
+ if (TREE_CODE (init_expr) == INTEGER_CST)
+ init_expr = fold_convert (TREE_TYPE (vectype), init_expr);
+ else
+ gcc_assert (tree_nop_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (init_expr)));
+
+ switch (induction_type)
+ {
+ case vect_step_op_neg:
+ if (TREE_CODE (init_expr) != INTEGER_CST
+ && TREE_CODE (init_expr) != REAL_CST)
+ {
+ /* Check for backend support of NEGATE_EXPR and vec_perm. */
+ if (!directly_supported_p (NEGATE_EXPR, vectype))
+ return false;
+
+ /* The encoding has 2 interleaved stepped patterns. */
+ vec_perm_builder sel (nunits, 2, 3);
+ machine_mode mode = TYPE_MODE (vectype);
+ sel.quick_grow (6);
+ for (i = 0; i < 3; i++)
+ {
+ sel[i * 2] = i;
+ sel[i * 2 + 1] = i + nunits;
+ }
+ vec_perm_indices indices (sel, 2, nunits);
+ if (!can_vec_perm_const_p (mode, mode, indices))
+ return false;
+ }
+ break;
+
+ case vect_step_op_mul:
+ {
+ /* Check for backend support of MULT_EXPR. */
+ if (!directly_supported_p (MULT_EXPR, vectype))
+ return false;
+
+ /* ?? How to construct vector step for variable number vector.
+ [ 1, step, pow (step, 2), pow (step, 4), .. ]. */
+ if (!vf.is_constant ())
+ return false;
+ }
+ break;
+
+ case vect_step_op_shr:
+ /* Check for backend support of RSHIFT_EXPR. */
+ if (!directly_supported_p (RSHIFT_EXPR, vectype, optab_vector))
+ return false;
+
+ /* Don't shift more than type precision to avoid UD. */
+ if (!tree_fits_uhwi_p (step_expr)
+ || maybe_ge (nunits * tree_to_uhwi (step_expr),
+ TYPE_PRECISION (TREE_TYPE (init_expr))))
+ return false;
+ break;
+
+ case vect_step_op_shl:
+ /* Check for backend support of RSHIFT_EXPR. */
+ if (!directly_supported_p (LSHIFT_EXPR, vectype, optab_vector))
+ return false;
+
+ /* Don't shift more than type precision to avoid UD. */
+ if (!tree_fits_uhwi_p (step_expr)
+ || maybe_ge (nunits * tree_to_uhwi (step_expr),
+ TYPE_PRECISION (TREE_TYPE (init_expr))))
+ return false;
+
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!vec_stmt) /* transformation not required. */
+ {
+ unsigned inside_cost = 0, prologue_cost = 0;
+ /* loop cost for vec_loop. Neg induction doesn't have any
+ inside_cost. */
+ inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
+
+ /* loop cost for vec_loop. Neg induction doesn't have any
+ inside_cost. */
+ if (induction_type == vect_step_op_neg)
+ inside_cost = 0;
+
+ /* prologue cost for vec_init and vec_step. */
+ prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_induction_cost: inside_cost = %d, "
+ "prologue_cost = %d. \n", inside_cost,
+ prologue_cost);
+
+ STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
+ DUMP_VECT_SCOPE ("vectorizable_nonlinear_induction");
+ return true;
+ }
+
+ /* Transform. */
+
+ /* Compute a vector variable, initialized with the first VF values of
+ the induction variable. E.g., for an iv with IV_PHI='X' and
+ evolution S, for a vector of 4 units, we want to compute:
+ [X, X + S, X + 2*S, X + 3*S]. */
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
+
+ pe = loop_preheader_edge (iv_loop);
+ /* Find the first insertion point in the BB. */
+ basic_block bb = gimple_bb (phi);
+ si = gsi_after_labels (bb);
+
+ gimple_seq stmts = NULL;
+
+ /* If we are using the loop mask to "peel" for alignment then we need
+ to adjust the start value here. */
+ if (niters_skip != NULL_TREE)
+ init_expr = vect_peel_nonlinear_iv_init (&stmts, init_expr, niters_skip,
+ step_expr, induction_type);
+
+ vec_init = vect_create_nonlinear_iv_init (&stmts, init_expr,
+ step_expr, nunits, vectype,
+ induction_type);
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+
+ stmts = NULL;
+ new_name = vect_create_nonlinear_iv_step (&stmts, step_expr,
+ vf, induction_type);
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+
+ vec_step = vect_create_nonlinear_iv_vec_step (loop_vinfo, stmt_info,
+ new_name, vectype,
+ induction_type);
+ /* Create the following def-use cycle:
+ loop prolog:
+ vec_init = ...
+ vec_step = ...
+ loop:
+ vec_iv = PHI <vec_init, vec_loop>
+ ...
+ STMT
+ ...
+ vec_loop = vec_iv + vec_step; */
+
+ /* Create the induction-phi that defines the induction-operand. */
+ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
+ induction_phi = create_phi_node (vec_dest, iv_loop->header);
+ induc_def = PHI_RESULT (induction_phi);
+
+ /* Create the iv update inside the loop. */
+ stmts = NULL;
+ vec_def = vect_update_nonlinear_iv (&stmts, vectype,
+ induc_def, vec_step,
+ induction_type);
+
+ gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ new_stmt = SSA_NAME_DEF_STMT (vec_def);
+
+ /* Set the arguments of the phi node: */
+ add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
+ add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
+ UNKNOWN_LOCATION);
+
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi);
+ *vec_stmt = induction_phi;
+
+ /* In case that vectorization factor (VF) is bigger than the number
+ of elements that we can fit in a vectype (nunits), we have to generate
+ more than one vector stmt - i.e - we need to "unroll" the
+ vector stmt by a factor VF/nunits. For more details see documentation
+ in vectorizable_operation. */
+
+ if (ncopies > 1)
+ {
+ stmts = NULL;
+ /* FORNOW. This restriction should be relaxed. */
+ gcc_assert (!nested_in_vect_loop);
+
+ new_name = vect_create_nonlinear_iv_step (&stmts, step_expr,
+ nunits, induction_type);
+
+ vec_step = vect_create_nonlinear_iv_vec_step (loop_vinfo, stmt_info,
+ new_name, vectype,
+ induction_type);
+ vec_def = induc_def;
+ for (i = 1; i < ncopies; i++)
+ {
+ /* vec_i = vec_prev + vec_step. */
+ stmts = NULL;
+ vec_def = vect_update_nonlinear_iv (&stmts, vectype,
+ vec_def, vec_step,
+ induction_type);
+ gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ new_stmt = SSA_NAME_DEF_STMT (vec_def);
+ STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ }
+ }
+
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "transform induction: created def-use cycle: %G%G",
+ (gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def));
+
+ return true;
+}
+
/* Function vectorizable_induction
Check if STMT_INFO performs an induction computation that can be vectorized.
@@ -8263,6 +8924,8 @@ vectorizable_induction (loop_vec_info loop_vinfo,
unsigned i;
tree expr;
gimple_stmt_iterator si;
+ enum vect_induction_op_type induction_type
+ = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
gphi *phi = dyn_cast <gphi *> (stmt_info->stmt);
if (!phi)
@@ -8275,6 +8938,11 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
return false;
+ /* Handle nonlinear induction in a separate place. */
+ if (induction_type != vect_step_op_add)
+ return vectorizable_nonlinear_induction (loop_vinfo, stmt_info,
+ vec_stmt, slp_node, cost_vec);
+
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index a2b0afb..5e75ed1 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -68,6 +68,15 @@ enum vect_def_type {
vect_unknown_def_type
};
+/* Define operation type of linear/non-linear induction variable. */
+enum vect_induction_op_type {
+ vect_step_op_add = 0,
+ vect_step_op_neg,
+ vect_step_op_mul,
+ vect_step_op_shl,
+ vect_step_op_shr
+};
+
/* Define type of reduction. */
enum vect_reduction_type {
TREE_CODE_REDUCTION,
@@ -1190,6 +1199,7 @@ public:
the version here. */
tree loop_phi_evolution_base_unchanged;
tree loop_phi_evolution_part;
+ enum vect_induction_op_type loop_phi_evolution_type;
/* Used for various bookkeeping purposes, generally holding a pointer to
some other stmt S that is in some way "related" to this stmt.
@@ -1423,6 +1433,7 @@ struct gather_scatter_info {
((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S))
#define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged
#define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part
+#define STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE(S) (S)->loop_phi_evolution_type
#define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist
#define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type
#define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code
@@ -2329,6 +2340,10 @@ extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
stmt_vector_for_cost *);
extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
+/* Nonlinear induction. */
+extern tree vect_peel_nonlinear_iv_init (gimple_seq*, tree, tree,
+ tree, enum vect_induction_op_type);
+
/* In tree-vect-slp.cc. */
extern void vect_slp_init (void);
extern void vect_slp_fini (void);