aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2020-11-02 12:38:04 +0100
committerRichard Biener <rguenther@suse.de>2020-11-02 15:58:14 +0100
commite881774d0dda6d5127eb8f0642f6edc16dc0b1e7 (patch)
tree246429551c79f231c1b41f6cf035e478fdc00452
parent86deadf8d3ac55b3cd07e15d4e83e3b6ccd9ee81 (diff)
downloadgcc-e881774d0dda6d5127eb8f0642f6edc16dc0b1e7.zip
gcc-e881774d0dda6d5127eb8f0642f6edc16dc0b1e7.tar.gz
gcc-e881774d0dda6d5127eb8f0642f6edc16dc0b1e7.tar.bz2
Rewrite SLP induction vectorization
This rewrites SLP induction vectorization to handle different inductions in the different SLP lanes. It also changes SLP build to represent the initial value (but not the cycle) so it can be enhanced to handle outer loop vectorization later. Note this FAILs gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c because it removes one CSE optimization that no longer works with non-uniform initial value and step. I'll see to recover from this after outer loop vectorization of inductions works. It might be a bit friendlier to variable-size vectors now but then we're now building the step vector from scalars ... 2020-11-02 Richard Biener <rguenther@suse.de> * tree.h (build_real_from_wide): Declare. * tree.c (build_real_from_wide): New function. * tree-vect-slp.c (vect_build_slp_tree_2): Remove restriction on induction vectorization, represent the initial value. * tree-vect-loop.c (vect_model_induction_cost): Inline ... (vectorizable_induction): ... here. Rewrite SLP code generation. * gcc.dg/vect/slp-49.c: New testcase.
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-49.c38
-rw-r--r--gcc/tree-vect-loop.c296
-rw-r--r--gcc/tree-vect-slp.c18
-rw-r--r--gcc/tree.c16
-rw-r--r--gcc/tree.h1
5 files changed, 222 insertions, 147 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/slp-49.c b/gcc/testsuite/gcc.dg/vect/slp-49.c
new file mode 100644
index 0000000..3f53baf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-49.c
@@ -0,0 +1,38 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+int a[1024];
+
+void __attribute__((noipa))
+foo(int k)
+{
+ int j = 5;
+ for (int i = 0; i < 512; ++i)
+ {
+ a[2*i] = j;
+ a[2*i+1] = k;
+ j++;
+ k+=3;
+ }
+}
+
+int
+main()
+{
+ check_vect ();
+
+ foo (17);
+
+ for (int i = 0; i < 512; ++i)
+ {
+ if (a[2*i] != 5 + i
+ || a[2*i+1] != 17 + 3 * i)
+ __builtin_abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 105ea61..fcea289 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4443,34 +4443,6 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo,
}
-/* Function vect_model_induction_cost.
-
- Models cost for induction operations. */
-
-static void
-vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
- stmt_vector_for_cost *cost_vec)
-{
- unsigned inside_cost, prologue_cost;
-
- if (PURE_SLP_STMT (stmt_info))
- return;
-
- /* loop cost for vec_loop. */
- inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
-
- /* prologue cost for vec_init and vec_step. */
- prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
- stmt_info, 0, vect_prologue);
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vect_model_induction_cost: inside_cost = %d, "
- "prologue_cost = %d .\n", inside_cost, prologue_cost);
-}
-
-
/* Function get_initial_def_for_reduction
@@ -7796,7 +7768,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (slp_node && !nunits.is_constant ())
{
- /* The current SLP code creates the initial value element-by-element. */
+ /* The current SLP code creates the step value element-by-element. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"SLP induction not supported for variable-length"
@@ -7806,9 +7778,46 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (!vec_stmt) /* transformation not required. */
{
+ unsigned inside_cost = 0, prologue_cost = 0;
+ if (slp_node)
+ {
+ /* We eventually need to set a vector type on invariant
+ arguments. */
+ unsigned j;
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
+ if (!vect_maybe_update_slp_op_vectype
+ (child, SLP_TREE_VECTYPE (slp_node)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "incompatible vector types for "
+ "invariants\n");
+ return false;
+ }
+ /* loop cost for vec_loop. */
+ inside_cost
+ = record_stmt_cost (cost_vec,
+ SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
+ vector_stmt, stmt_info, 0, vect_body);
+ }
+ else /* if (!slp_node) */
+ {
+ /* loop cost for vec_loop. */
+ inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
+ /* prologue cost for vec_init and vec_step. */
+ prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
+ stmt_info, 0, vect_prologue);
+ }
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_model_induction_cost: inside_cost = %d, "
+ "prologue_cost = %d .\n", inside_cost,
+ prologue_cost);
+
STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_induction");
- vect_model_induction_cost (stmt_info, ncopies, cost_vec);
return true;
}
@@ -7827,98 +7836,66 @@ vectorizable_induction (loop_vec_info loop_vinfo,
tree step_vectype = get_same_sized_vectype (TREE_TYPE (step_expr), vectype);
pe = loop_preheader_edge (iv_loop);
- init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
- loop_preheader_edge (iv_loop));
-
- stmts = NULL;
- if (!nested_in_vect_loop)
- {
- /* Convert the initial value to the IV update type. */
- tree new_type = TREE_TYPE (step_expr);
- init_expr = gimple_convert (&stmts, new_type, init_expr);
-
- /* If we are using the loop mask to "peel" for alignment then we need
- to adjust the start value here. */
- tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
- if (skip_niters != NULL_TREE)
- {
- if (FLOAT_TYPE_P (vectype))
- skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
- skip_niters);
- else
- skip_niters = gimple_convert (&stmts, new_type, skip_niters);
- tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
- skip_niters, step_expr);
- init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
- init_expr, skip_step);
- }
- }
-
- if (stmts)
- {
- new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
- gcc_assert (!new_bb);
- }
-
/* Find the first insertion point in the BB. */
basic_block bb = gimple_bb (phi);
si = gsi_after_labels (bb);
/* For SLP induction we have to generate several IVs as for example
- with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
- [i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform
- [VF*S, VF*S, VF*S, VF*S] for all. */
+ with group size 3 we need
+ [i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1]
+ [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
if (slp_node)
{
/* Enforced above. */
unsigned int const_nunits = nunits.to_constant ();
- /* Generate [VF*S, VF*S, ... ]. */
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
- {
- expr = build_int_cst (integer_type_node, vf);
- expr = fold_convert (TREE_TYPE (step_expr), expr);
- }
- else
- expr = build_int_cst (TREE_TYPE (step_expr), vf);
- new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (! CONSTANT_CLASS_P (new_name))
- new_name = vect_init_vector (loop_vinfo, stmt_info, new_name,
- TREE_TYPE (step_expr), NULL);
- new_vec = build_vector_from_val (step_vectype, new_name);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
+ /* The initial values are vectorized, but any lanes > group_size
+ need adjustment. */
+ slp_tree init_node
+ = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
- /* Now generate the IVs. */
+ /* Gather steps. Since we do not vectorize inductions as
+ cycles we have to reconstruct the step from SCEV data. */
unsigned group_size = SLP_TREE_LANES (slp_node);
+ tree *steps = XALLOCAVEC (tree, group_size);
+ stmt_vec_info phi_info;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
+ steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+
+ /* Now generate the IVs. */
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- unsigned elts = const_nunits * nvects;
- /* Compute the number of distinct IVs we need. First reduce
- group_size if it is a multiple of const_nunits so we get
- one IV for a group_size of 4 but const_nunits 2. */
- unsigned group_sizep = group_size;
- if (group_sizep % const_nunits == 0)
- group_sizep = group_sizep / const_nunits;
- unsigned nivs = least_common_multiple (group_sizep,
+ gcc_assert ((const_nunits * nvects) % group_size == 0);
+ unsigned nivs = least_common_multiple (group_size,
const_nunits) / const_nunits;
- gcc_assert (elts % group_size == 0);
- tree elt = init_expr;
+ unsigned lup_mul = (nvects * const_nunits) / group_size;
+ tree stept = TREE_TYPE (step_vectype);
+ tree lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept, lup_mul,
+ UNSIGNED)
+ : build_int_cstu (stept, lup_mul));
unsigned ivn;
+ auto_vec<tree> vec_steps;
for (ivn = 0; ivn < nivs; ++ivn)
{
tree_vector_builder elts (step_vectype, const_nunits, 1);
- stmts = NULL;
+ tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
{
- if (ivn*const_nunits + eltn >= group_size
- && (ivn * const_nunits + eltn) % group_size == 0)
- elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
- elt, step_expr);
+ tree elt = steps[(ivn*const_nunits + eltn) % group_size];
elts.quick_push (elt);
+ unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
+ mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ mul_elt, UNSIGNED)
+ : build_int_cstu (stept, mul_elt));
}
- vec_init = gimple_build_vector (&stmts, &elts);
- vec_init = gimple_convert (&stmts, vectype, vec_init);
+ stmts = NULL;
+ vec_step = gimple_build_vector (&stmts, &elts);
+ vec_step = gimple_convert (&stmts, step_vectype, vec_step);
+ vec_steps.safe_push (vec_step);
+ tree step_mul = gimple_build_vector (&stmts, &mul_elts);
if (stmts)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
@@ -7926,65 +7903,81 @@ vectorizable_induction (loop_vec_info loop_vinfo,
}
/* Create the induction-phi that defines the induction-operand. */
- vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
+ vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
+ "vec_iv_");
induction_phi = create_phi_node (vec_dest, iv_loop->header);
induc_def = PHI_RESULT (induction_phi);
/* Create the iv update inside the loop */
- gimple_seq stmts = NULL;
+ stmts = NULL;
+ tree up = gimple_build (&stmts, MULT_EXPR, step_vectype,
+ vec_step, lupdate_mul);
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+ stmts = NULL;
vec_def = gimple_convert (&stmts, step_vectype, induc_def);
vec_def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, vec_def, vec_step);
+ PLUS_EXPR, step_vectype, vec_def, up);
vec_def = gimple_convert (&stmts, vectype, vec_def);
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
+ add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
+ UNKNOWN_LOCATION);
+
+ vec_init = vect_get_slp_vect_def (init_node, ivn);
+ if (!integer_zerop (step_mul))
+ {
+ stmts = NULL;
+ vec_def = gimple_convert (&stmts, step_vectype, vec_init);
+ up = gimple_build (&stmts, MULT_EXPR, step_vectype,
+ vec_step, step_mul);
+ vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
+ vec_def, up);
+ vec_init = gimple_convert (&stmts, vectype, vec_def);
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
/* Set the arguments of the phi node: */
add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
- add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
- UNKNOWN_LOCATION);
SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi);
}
- /* Fill up to the number of vectors we need for the whole group. */
- nivs = least_common_multiple (group_size,
- const_nunits) / const_nunits;
- for (; ivn < nivs; ++ivn)
- SLP_TREE_VEC_STMTS (slp_node)
- .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]);
- /* Re-use IVs when we can. */
+ /* Re-use IVs when we can. We are generating further vector
+ stmts by adding VF' * stride to the IVs generated above. */
if (ivn < nvects)
{
unsigned vfp
= least_common_multiple (group_size, const_nunits) / group_size;
- /* Generate [VF'*S, VF'*S, ... ]. */
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
- {
- expr = build_int_cst (integer_type_node, vfp);
- expr = fold_convert (TREE_TYPE (step_expr), expr);
- }
- else
- expr = build_int_cst (TREE_TYPE (step_expr), vfp);
- new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
- expr, step_expr);
- if (! CONSTANT_CLASS_P (new_name))
- new_name = vect_init_vector (loop_vinfo, stmt_info, new_name,
- TREE_TYPE (step_expr), NULL);
- new_vec = build_vector_from_val (step_vectype, new_name);
- vec_step = vect_init_vector (loop_vinfo, stmt_info, new_vec,
- step_vectype, NULL);
+ tree lupdate_mul
+ = build_vector_from_val (step_vectype,
+ SCALAR_FLOAT_TYPE_P (stept)
+ ? build_real_from_wide (stept,
+ vfp, UNSIGNED)
+ : build_int_cstu (stept, vfp));
for (; ivn < nvects; ++ivn)
{
gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs];
- tree def;
- if (gimple_code (iv) == GIMPLE_PHI)
- def = gimple_phi_result (iv);
- else
- def = gimple_assign_lhs (iv);
+ tree def = gimple_get_lhs (iv);
gimple_seq stmts = NULL;
+ if (ivn < 2*nivs)
+ {
+ vec_steps[ivn - nivs]
+ = gimple_build (&stmts, MULT_EXPR, step_vectype,
+ vec_steps[ivn - nivs], lupdate_mul);
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+ }
+ stmts = NULL;
def = gimple_convert (&stmts, step_vectype, def);
- def = gimple_build (&stmts,
- PLUS_EXPR, step_vectype, def, vec_step);
+ def = gimple_build (&stmts, PLUS_EXPR, step_vectype,
+ def, vec_steps[ivn % nivs]);
def = gimple_convert (&stmts, vectype, def);
if (gimple_code (iv) == GIMPLE_PHI)
gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
@@ -8001,6 +7994,39 @@ vectorizable_induction (loop_vec_info loop_vinfo,
return true;
}
+ init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
+ loop_preheader_edge (iv_loop));
+
+ stmts = NULL;
+ if (!nested_in_vect_loop)
+ {
+ /* Convert the initial value to the IV update type. */
+ tree new_type = TREE_TYPE (step_expr);
+ init_expr = gimple_convert (&stmts, new_type, init_expr);
+
+ /* If we are using the loop mask to "peel" for alignment then we need
+ to adjust the start value here. */
+ tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ if (skip_niters != NULL_TREE)
+ {
+ if (FLOAT_TYPE_P (vectype))
+ skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
+ skip_niters);
+ else
+ skip_niters = gimple_convert (&stmts, new_type, skip_niters);
+ tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
+ skip_niters, step_expr);
+ init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
+ init_expr, skip_step);
+ }
+ }
+
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+
/* Create the vector that holds the initial_value of the induction. */
if (nested_in_vect_loop)
{
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 45e33c0..63a59c0 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1441,20 +1441,14 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
return NULL;
vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
- /* Induction from different IVs is not supported. */
if (def_type == vect_induction_def)
{
- stmt_vec_info other_info;
- FOR_EACH_VEC_ELT (stmts, i, other_info)
- if (stmt_info != other_info)
- return NULL;
-
- /* Induction PHIs are leafs. */
- (*tree_size)++;
- node = vect_create_new_slp_node (node, stmts, nops);
- SLP_TREE_VECTYPE (node) = vectype;
- SLP_TREE_CHILDREN (node).quick_grow_cleared (nops);
- return node;
+ /* Induction PHIs are not cycles but walk the initial
+ value. */
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ if (nested_in_vect_loop_p (loop, stmt_info))
+ loop = loop->inner;
+ skip_args[loop_latch_edge (loop)->dest_idx] = true;
}
else if (def_type == vect_reduction_def
|| def_type == vect_double_reduction_def
diff --git a/gcc/tree.c b/gcc/tree.c
index 81f867d..739c288 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -2250,6 +2250,22 @@ build_real_from_int_cst (tree type, const_tree i)
return v;
}
+/* Return a new REAL_CST node whose type is TYPE
+ and whose value is the integer value I which has sign SGN. */
+
+tree
+build_real_from_wide (tree type, const wide_int_ref &i, signop sgn)
+{
+ REAL_VALUE_TYPE d;
+
+ /* Clear all bits of the real value type so that we can later do
+ bitwise comparisons to see if two values are the same. */
+ memset (&d, 0, sizeof d);
+
+ real_from_integer (&d, TYPE_MODE (type), i, sgn);
+ return build_real (type, d);
+}
+
/* Return a newly constructed STRING_CST node whose value is the LEN
characters at STR when STR is nonnull, or all zeros otherwise.
Note that for a C string literal, LEN should include the trailing NUL.
diff --git a/gcc/tree.h b/gcc/tree.h
index 7f0aa5b..04e564c 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4429,6 +4429,7 @@ extern tree build_constructor_from_vec (tree, const vec<tree, va_gc> *);
extern tree build_constructor_va (tree, int, ...);
extern tree build_clobber (tree);
extern tree build_real_from_int_cst (tree, const_tree);
+extern tree build_real_from_wide (tree, const wide_int_ref &, signop);
extern tree build_complex (tree, tree, tree);
extern tree build_complex_inf (tree, bool);
extern tree build_each_one_cst (tree);