aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop-manip.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2017-09-22 16:44:29 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2017-09-22 16:44:29 +0000
commitf702e7d43f2aec71640d0db7ecf1543ba75f37c3 (patch)
tree9beee33fae8a909f405a3f3bb2328cab22dde95c /gcc/tree-vect-loop-manip.c
parentb2b67217d3c0901f7c414b7f4b4a92e92678846e (diff)
downloadgcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.zip
gcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.tar.gz
gcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.tar.bz2
Let the target choose a vectorisation alignment
The vectoriser aligned vectors to TYPE_ALIGN unconditionally, although there was also a hard-coded assumption that this was equal to the type size. This was inconvenient for SVE for two reasons: - When compiling for a specific power-of-2 SVE vector length, we might want to align to a full vector. However, the TYPE_ALIGN is governed by the ABI alignment, which is 128 bits regardless of size. - For vector-length-agnostic code it doesn't usually make sense to align, since the runtime vector length might not be a power of two. Even for power of two sizes, there's no guarantee that aligning to the previous 16 bytes will be an improveent. This patch therefore adds a target hook to control the preferred vectoriser (as opposed to ABI) alignment. 2017-09-22 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * target.def (preferred_vector_alignment): New hook. * doc/tm.texi.in (TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT): New hook. * doc/tm.texi: Regenerate. * targhooks.h (default_preferred_vector_alignment): Declare. * targhooks.c (default_preferred_vector_alignment): New function. * tree-vectorizer.h (dataref_aux): Add a target_alignment field. Expand commentary. (DR_TARGET_ALIGNMENT): New macro. (aligned_access_p): Update commentary. (vect_known_alignment_in_bytes): New function. * tree-vect-data-refs.c (vect_calculate_required_alignment): New function. (vect_compute_data_ref_alignment): Set DR_TARGET_ALIGNMENT. Calculate the misalignment based on the target alignment rather than the vector size. (vect_update_misalignment_for_peel): Use DR_TARGET_ALIGMENT rather than TYPE_ALIGN / BITS_PER_UNIT to update the misalignment. (vect_enhance_data_refs_alignment): Mask the byte misalignment with the target alignment, rather than masking the element misalignment with the number of elements in a vector. Also use the target alignment when calculating the maximum number of peels. (vect_find_same_alignment_drs): Use vect_calculate_required_alignment instead of TYPE_ALIGN_UNIT. (vect_duplicate_ssa_name_ptr_info): Remove stmt_info parameter. Measure DR_MISALIGNMENT relative to DR_TARGET_ALIGNMENT. (vect_create_addr_base_for_vector_ref): Update call accordingly. (vect_create_data_ref_ptr): Likewise. (vect_setup_realignment): Realign by ANDing with -DR_TARGET_MISALIGNMENT. * tree-vect-loop-manip.c (vect_gen_prolog_loop_niters): Calculate the number of peels based on DR_TARGET_ALIGNMENT. * tree-vect-stmts.c (get_group_load_store_type): Compare the gap with the guaranteed alignment boundary when deciding whether overrun is OK. (vectorizable_mask_load_store): Interpret DR_MISALIGNMENT relative to DR_TARGET_ALIGNMENT instead of TYPE_ALIGN_UNIT. (ensure_base_align): Remove stmt_info parameter. Get the target base alignment from DR_TARGET_ALIGNMENT. (vectorizable_store): Update call accordingly. Interpret DR_MISALIGNMENT relative to DR_TARGET_ALIGNMENT instead of TYPE_ALIGN_UNIT. (vectorizable_load): Likewise. gcc/testsuite/ * gcc.dg/vect/vect-outer-3a.c: Adjust dump scan for new wording of alignment message. * gcc.dg/vect/vect-outer-3a-big-array.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r253101
Diffstat (limited to 'gcc/tree-vect-loop-manip.c')
-rw-r--r--gcc/tree-vect-loop-manip.c51
1 files changed, 27 insertions, 24 deletions
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index f78e4b4..5787d53 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -956,8 +956,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
gimple *dr_stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
- int nelements = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
@@ -978,32 +977,36 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
&stmts, offset);
tree type = unsigned_type_for (TREE_TYPE (start_addr));
- tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
- HOST_WIDE_INT elem_size =
- int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+ tree target_align_minus_1 = build_int_cst (type, target_align - 1);
+ HOST_WIDE_INT elem_size
+ = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
- tree nelements_minus_1 = build_int_cst (type, nelements - 1);
- tree nelements_tree = build_int_cst (type, nelements);
- tree byte_misalign;
- tree elem_misalign;
-
- /* Create: byte_misalign = addr & (vectype_align - 1) */
- byte_misalign =
- fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
- vectype_align_minus_1);
-
- /* Create: elem_misalign = byte_misalign / element_size */
- elem_misalign =
- fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
-
- /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
+ HOST_WIDE_INT align_in_elems = target_align / elem_size;
+ tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
+ tree align_in_elems_tree = build_int_cst (type, align_in_elems);
+ tree misalign_in_bytes;
+ tree misalign_in_elems;
+
+ /* Create: misalign_in_bytes = addr & (target_align - 1). */
+ misalign_in_bytes
+ = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
+ target_align_minus_1);
+
+ /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
+ misalign_in_elems
+ = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
+
+ /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
+ & (align_in_elems - 1)). */
if (negative)
- iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
+ iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
+ align_in_elems_tree);
else
- iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
- iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
+ iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
+ misalign_in_elems);
+ iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
iters = fold_convert (niters_type, iters);
- *bound = nelements - 1;
+ *bound = align_in_elems - 1;
}
if (dump_enabled_p ())