aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-slp.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-11-16 10:29:31 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-11-16 10:29:31 +0000
commit9b75f56d4b7951c60a656396dddd4a65787b95bc (patch)
tree5908e1c542d7e66687f8d2141198f3323e3a51eb /gcc/tree-vect-slp.c
parent23ff8c05804c2d3fc6e9179d2e9c2940ae53d592 (diff)
downloadgcc-9b75f56d4b7951c60a656396dddd4a65787b95bc.zip
gcc-9b75f56d4b7951c60a656396dddd4a65787b95bc.tar.gz
gcc-9b75f56d4b7951c60a656396dddd4a65787b95bc.tar.bz2
Apply maximum nunits for BB SLP
The BB vectoriser picked vector types in the same way as the loop vectoriser: it picked a vector mode/size for the region and then based all the vector types off that choice. This meant we could end up trying to use vector types that had too many elements for the group size. The main part of this patch is therefore about passing the SLP group size down to routines like get_vectype_for_scalar_type and ensuring that each vector type in the SLP tree is chosen wrt the group size. That part in itself is pretty easy and mechanical. The main warts are: (1) We normally pick a STMT_VINFO_VECTYPE for data references at an early stage (vect_analyze_data_refs). However, nothing in the BB vectoriser relied on this, or on the min_vf calculated from it. I couldn't see anything other than vect_recog_bool_pattern that tried to access the vector type before the SLP tree is built. (2) It's possible for the same statement to be used in groups of different sizes. Taking the group size into account meant that we could try to pick different vector types for the same statement. This problem should go away with the move to doing everything on SLP trees, where presumably we would attach the vector type to the SLP node rather than the stmt_vec_info. Until then, the patch just uses a first-come, first-served approach. (3) A similar problem exists for grouped data references, where different statements in the same dataref group could be used in SLP nodes that have different group sizes. The patch copes with that by making sure that all vector types in a dataref group remain consistent. The patch means that: void f (int *x, short *y) { x[0] += y[0]; x[1] += y[1]; x[2] += y[2]; x[3] += y[3]; } now produces: ldr q0, [x0] ldr d1, [x1] saddw v0.4s, v0.4s, v1.4h str q0, [x0] ret instead of: ldrsh w2, [x1] ldrsh w3, [x1, 2] fmov s0, w2 ldrsh w2, [x1, 4] ldrsh w1, [x1, 6] ins v0.s[1], w3 ldr q1, [x0] ins v0.s[2], w2 ins v0.s[3], w1 add v0.4s, v0.4s, v1.4s str q0, [x0] ret Unfortunately it also means we start to vectorise gcc.target/i386/pr84101.c for -m32. That seems like a target cost issue though; see PR92265 for details. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-vectorizer.h (vect_get_vector_types_for_stmt): Take an optional maximum nunits. (get_vectype_for_scalar_type): Likewise. Also declare a form that takes an slp_tree. (get_mask_type_for_scalar_type): Take an optional slp_tree. (vect_get_mask_type_for_stmt): Likewise. * tree-vect-data-refs.c (vect_analyze_data_refs): Don't store the vector type in STMT_VINFO_VECTYPE for BB vectorization. * tree-vect-patterns.c (vect_recog_bool_pattern): Use vect_get_vector_types_for_stmt instead of STMT_VINFO_VECTYPE to get an assumed vector type for data references. * tree-vect-slp.c (vect_update_shared_vectype): New function. (vect_update_all_shared_vectypes): Likewise. (vect_build_slp_tree_1): Pass the group size to vect_get_vector_types_for_stmt. Use vect_update_shared_vectype for BB vectorization. (vect_build_slp_tree_2): Call vect_update_all_shared_vectypes before building the vectof from scalars. (vect_analyze_slp_instance): Pass the group size to get_vectype_for_scalar_type. (vect_slp_analyze_node_operations_1): Don't recompute the vector types for BB vectorization here; just handle the case in which we deferred the choice for booleans. (vect_get_constant_vectors): Pass the slp_tree to get_vectype_for_scalar_type. * tree-vect-stmts.c (vect_prologue_cost_for_slp_op): Likewise. (vectorizable_call): Likewise. (vectorizable_simd_clone_call): Likewise. (vectorizable_conversion): Likewise. (vectorizable_shift): Likewise. (vectorizable_operation): Likewise. (vectorizable_comparison): Likewise. (vect_is_simple_cond): Take the slp_tree as argument and pass it to get_vectype_for_scalar_type. (vectorizable_condition): Update call accordingly. (get_vectype_for_scalar_type): Take a group_size argument. For BB vectorization, limit the the vector to that number of elements. Also define an overload that takes an slp_tree. (get_mask_type_for_scalar_type): Add an slp_tree argument and pass it to get_vectype_for_scalar_type. (vect_get_vector_types_for_stmt): Add a group_size argument and pass it to get_vectype_for_scalar_type. Don't use the cached vector type for BB vectorization if a group size is given. Handle data references in that case. (vect_get_mask_type_for_stmt): Take an slp_tree argument and pass it to get_mask_type_for_scalar_type. gcc/testsuite/ * gcc.dg/vect/bb-slp-4.c: Expect the block to be vectorized with -fno-vect-cost-model. * gcc.dg/vect/bb-slp-bool-1.c: New test. * gcc.target/aarch64/vect_mixed_sizes_14.c: Likewise. * gcc.target/i386/pr84101.c: XFAIL for -m32. From-SVN: r278334
Diffstat (limited to 'gcc/tree-vect-slp.c')
-rw-r--r--gcc/tree-vect-slp.c113
1 files changed, 92 insertions, 21 deletions
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 1c1e502..e5a7078 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -607,6 +607,77 @@ again:
return 0;
}
+/* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
+ Return true if we can, meaning that this choice doesn't conflict with
+ existing SLP nodes that use STMT_INFO. */
+
+static bool
+vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
+{
+ tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
+ if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
+ return true;
+
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+ {
+ /* We maintain the invariant that if any statement in the group is
+ used, all other members of the group have the same vector type. */
+ stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ stmt_vec_info member_info = first_info;
+ for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
+ if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
+ || is_pattern_stmt_p (member_info))
+ break;
+
+ if (!member_info)
+ {
+ for (member_info = first_info; member_info;
+ member_info = DR_GROUP_NEXT_ELEMENT (member_info))
+ STMT_VINFO_VECTYPE (member_info) = vectype;
+ return true;
+ }
+ }
+ else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
+ && !is_pattern_stmt_p (stmt_info))
+ {
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
+ return true;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: incompatible vector"
+ " types for: %G", stmt_info->stmt);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " old vector type: %T\n", old_vectype);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " new vector type: %T\n", vectype);
+ }
+ return false;
+}
+
+/* Try to infer and assign a vector type to all the statements in STMTS.
+ Used only for BB vectorization. */
+
+static bool
+vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
+{
+ tree vectype, nunits_vectype;
+ if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
+ &nunits_vectype, stmts.length ()))
+ return false;
+
+ stmt_vec_info stmt_info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (stmts, i, stmt_info)
+ if (!vect_update_shared_vectype (stmt_info, vectype))
+ return false;
+
+ return true;
+}
+
/* Return true if call statements CALL1 and CALL2 are similar enough
to be combined into the same SLP group. */
@@ -753,6 +824,7 @@ vect_build_slp_tree_1 (unsigned char *swap,
stmt_vec_info stmt_info;
FOR_EACH_VEC_ELT (stmts, i, stmt_info)
{
+ vec_info *vinfo = stmt_info->vinfo;
gimple *stmt = stmt_info->stmt;
swap[i] = 0;
matches[i] = false;
@@ -786,7 +858,7 @@ vect_build_slp_tree_1 (unsigned char *swap,
tree nunits_vectype;
if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
- &nunits_vectype)
+ &nunits_vectype, group_size)
|| (nunits_vectype
&& !vect_record_max_nunits (stmt_info, group_size,
nunits_vectype, max_nunits)))
@@ -798,6 +870,10 @@ vect_build_slp_tree_1 (unsigned char *swap,
gcc_assert (vectype);
+ if (is_a <bb_vec_info> (vinfo)
+ && !vect_update_shared_vectype (stmt_info, vectype))
+ continue;
+
if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
{
rhs_code = CALL_EXPR;
@@ -1336,7 +1412,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
break;
- if (!grandchild)
+ if (!grandchild
+ && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
{
/* Roll back. */
this_tree_size = old_tree_size;
@@ -1377,7 +1454,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
do extra work to cancel the pattern so the uses see the
scalar version. */
&& !is_pattern_stmt_p (stmt_info)
- && !oprnd_info->any_pattern)
+ && !oprnd_info->any_pattern
+ && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1474,7 +1552,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
break;
- if (!grandchild)
+ if (!grandchild
+ && (vect_update_all_shared_vectypes
+ (oprnd_info->def_stmts)))
{
/* Roll back. */
this_tree_size = old_tree_size;
@@ -2010,8 +2090,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
scalar_type = TREE_TYPE (DR_REF (dr));
- vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
group_size = DR_GROUP_SIZE (stmt_info);
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
}
else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
{
@@ -2650,22 +2730,13 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
Memory accesses already got their vector type assigned
in vect_analyze_data_refs. */
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- if (bb_vinfo
- && ! STMT_VINFO_DATA_REF (stmt_info))
+ if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node)
{
- tree vectype, nunits_vectype;
- if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
- &nunits_vectype))
- /* We checked this when building the node. */
- gcc_unreachable ();
- if (vectype == boolean_type_node)
- {
- vectype = vect_get_mask_type_for_stmt (stmt_info);
- if (!vectype)
- /* vect_get_mask_type_for_stmt has already explained the
- failure. */
- return false;
- }
+ tree vectype = vect_get_mask_type_for_stmt (stmt_info, node);
+ if (!vectype)
+ /* vect_get_mask_type_for_stmt has already explained the
+ failure. */
+ return false;
stmt_vec_info sstmt_info;
unsigned int i;
@@ -3594,7 +3665,7 @@ vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
&& vect_mask_constant_operand_p (stmt_vinfo))
vector_type = truth_type_for (stmt_vectype);
else
- vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
+ vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
/* ??? For lane-reducing ops we should also have the required number
of vector stmts initialized rather than second-guessing here. */