diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2017-09-22 16:44:29 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2017-09-22 16:44:29 +0000 |
commit | f702e7d43f2aec71640d0db7ecf1543ba75f37c3 (patch) | |
tree | 9beee33fae8a909f405a3f3bb2328cab22dde95c /gcc/tree-vect-data-refs.c | |
parent | b2b67217d3c0901f7c414b7f4b4a92e92678846e (diff) | |
download | gcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.zip gcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.tar.gz gcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.tar.bz2 |
Let the target choose a vectorisation alignment
The vectoriser aligned vectors to TYPE_ALIGN unconditionally, although
there was also a hard-coded assumption that this was equal to the type
size. This was inconvenient for SVE for two reasons:
- When compiling for a specific power-of-2 SVE vector length, we might
want to align to a full vector. However, the TYPE_ALIGN is governed
by the ABI alignment, which is 128 bits regardless of size.
- For vector-length-agnostic code it doesn't usually make sense to align,
since the runtime vector length might not be a power of two. Even for
power of two sizes, there's no guarantee that aligning to the previous
16 bytes will be an improveent.
This patch therefore adds a target hook to control the preferred
vectoriser (as opposed to ABI) alignment.
2017-09-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* target.def (preferred_vector_alignment): New hook.
* doc/tm.texi.in (TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT): New
hook.
* doc/tm.texi: Regenerate.
* targhooks.h (default_preferred_vector_alignment): Declare.
* targhooks.c (default_preferred_vector_alignment): New function.
* tree-vectorizer.h (dataref_aux): Add a target_alignment field.
Expand commentary.
(DR_TARGET_ALIGNMENT): New macro.
(aligned_access_p): Update commentary.
(vect_known_alignment_in_bytes): New function.
* tree-vect-data-refs.c (vect_calculate_required_alignment): New
function.
(vect_compute_data_ref_alignment): Set DR_TARGET_ALIGNMENT.
Calculate the misalignment based on the target alignment rather than
the vector size.
(vect_update_misalignment_for_peel): Use DR_TARGET_ALIGMENT
rather than TYPE_ALIGN / BITS_PER_UNIT to update the misalignment.
(vect_enhance_data_refs_alignment): Mask the byte misalignment with
the target alignment, rather than masking the element misalignment
with the number of elements in a vector. Also use the target
alignment when calculating the maximum number of peels.
(vect_find_same_alignment_drs): Use vect_calculate_required_alignment
instead of TYPE_ALIGN_UNIT.
(vect_duplicate_ssa_name_ptr_info): Remove stmt_info parameter.
Measure DR_MISALIGNMENT relative to DR_TARGET_ALIGNMENT.
(vect_create_addr_base_for_vector_ref): Update call accordingly.
(vect_create_data_ref_ptr): Likewise.
(vect_setup_realignment): Realign by ANDing with
-DR_TARGET_MISALIGNMENT.
* tree-vect-loop-manip.c (vect_gen_prolog_loop_niters): Calculate
the number of peels based on DR_TARGET_ALIGNMENT.
* tree-vect-stmts.c (get_group_load_store_type): Compare the gap
with the guaranteed alignment boundary when deciding whether
overrun is OK.
(vectorizable_mask_load_store): Interpret DR_MISALIGNMENT
relative to DR_TARGET_ALIGNMENT instead of TYPE_ALIGN_UNIT.
(ensure_base_align): Remove stmt_info parameter. Get the
target base alignment from DR_TARGET_ALIGNMENT.
(vectorizable_store): Update call accordingly. Interpret
DR_MISALIGNMENT relative to DR_TARGET_ALIGNMENT instead of
TYPE_ALIGN_UNIT.
(vectorizable_load): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-outer-3a.c: Adjust dump scan for new wording
of alignment message.
* gcc.dg/vect/vect-outer-3a-big-array.c: Likewise.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r253101
Diffstat (limited to 'gcc/tree-vect-data-refs.c')
-rw-r--r-- | gcc/tree-vect-data-refs.c | 92 |
1 files changed, 52 insertions, 40 deletions
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 175052a..3c57e5c 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -775,6 +775,17 @@ vect_record_base_alignments (vec_info *vinfo) } } +/* Return the target alignment for the vectorized form of DR. */ + +static unsigned int +vect_calculate_target_alignment (struct data_reference *dr) +{ + gimple *stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + return targetm.vectorize.preferred_vector_alignment (vectype); +} + /* Function vect_compute_data_ref_alignment Compute the misalignment of the data reference DR. @@ -811,6 +822,10 @@ vect_compute_data_ref_alignment (struct data_reference *dr) innermost_loop_behavior *drb = vect_dr_behavior (dr); bool step_preserves_misalignment_p; + unsigned HOST_WIDE_INT vector_alignment + = vect_calculate_target_alignment (dr) / BITS_PER_UNIT; + DR_TARGET_ALIGNMENT (dr) = vector_alignment; + /* No step for BB vectorization. */ if (!loop) { @@ -823,43 +838,41 @@ vect_compute_data_ref_alignment (struct data_reference *dr) relative to the outer-loop (LOOP). This is ok only if the misalignment stays the same throughout the execution of the inner-loop, which is why we have to check that the stride of the dataref in the inner-loop evenly - divides by the vector size. */ + divides by the vector alignment. */ else if (nested_in_vect_loop_p (loop, stmt)) { step_preserves_misalignment_p - = (DR_STEP_ALIGNMENT (dr) - % GET_MODE_SIZE (TYPE_MODE (vectype))) == 0; + = (DR_STEP_ALIGNMENT (dr) % vector_alignment) == 0; if (dump_enabled_p ()) { if (step_preserves_misalignment_p) dump_printf_loc (MSG_NOTE, vect_location, - "inner step divides the vector-size.\n"); + "inner step divides the vector alignment.\n"); else dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "inner step doesn't divide the vector-size.\n"); + "inner step doesn't divide the vector" + " alignment.\n"); } } /* Similarly we can only use base and misalignment information relative to an innermost loop if the misalignment stays the same throughout the execution of the loop. As above, this is the case if the stride of - the dataref evenly divides by the vector size. */ + the dataref evenly divides by the alignment. */ else { unsigned vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); step_preserves_misalignment_p - = ((DR_STEP_ALIGNMENT (dr) * vf) - % GET_MODE_SIZE (TYPE_MODE (vectype))) == 0; + = ((DR_STEP_ALIGNMENT (dr) * vf) % vector_alignment) == 0; if (!step_preserves_misalignment_p && dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "step doesn't divide the vector-size.\n"); + "step doesn't divide the vector alignment.\n"); } unsigned int base_alignment = drb->base_alignment; unsigned int base_misalignment = drb->base_misalignment; - unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype); /* Calculate the maximum of the pooled base address alignment and the alignment that we can compute for DR itself. */ @@ -1007,9 +1020,8 @@ vect_update_misalignment_for_peel (struct data_reference *dr, { bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; int misal = DR_MISALIGNMENT (dr); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); misal += negative ? -npeel * dr_size : npeel * dr_size; - misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1; + misal &= DR_TARGET_ALIGNMENT (dr) - 1; SET_DR_MISALIGNMENT (dr, misal); return; } @@ -1657,16 +1669,17 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) { if (known_alignment_for_access_p (dr)) { - unsigned int npeel_tmp = 0; + unsigned int npeel_tmp = 0; bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; - vectype = STMT_VINFO_VECTYPE (stmt_info); - nelements = TYPE_VECTOR_SUBPARTS (vectype); - mis = DR_MISALIGNMENT (dr) / vect_get_scalar_dr_size (dr); + vectype = STMT_VINFO_VECTYPE (stmt_info); + nelements = TYPE_VECTOR_SUBPARTS (vectype); + unsigned int target_align = DR_TARGET_ALIGNMENT (dr); + unsigned int dr_size = vect_get_scalar_dr_size (dr); + mis = (negative ? DR_MISALIGNMENT (dr) : -DR_MISALIGNMENT (dr)); if (DR_MISALIGNMENT (dr) != 0) - npeel_tmp = (negative ? (mis - nelements) - : (nelements - mis)) & (nelements - 1); + npeel_tmp = (mis & (target_align - 1)) / dr_size; /* For multiple types, it is possible that the bigger type access will have more than one peeling option. E.g., a loop with two @@ -1701,7 +1714,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) { vect_peeling_hash_insert (&peeling_htab, loop_vinfo, dr, npeel_tmp); - npeel_tmp += nelements; + npeel_tmp += target_align / dr_size; } one_misalignment_known = true; @@ -1922,7 +1935,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) stmt = DR_STMT (dr0); stmt_info = vinfo_for_stmt (stmt); vectype = STMT_VINFO_VECTYPE (stmt_info); - nelements = TYPE_VECTOR_SUBPARTS (vectype); if (known_alignment_for_access_p (dr0)) { @@ -1935,9 +1947,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) updating DR_MISALIGNMENT values. The peeling factor is the vectorization factor minus the misalignment as an element count. */ - mis = DR_MISALIGNMENT (dr0) / vect_get_scalar_dr_size (dr0); - npeel = ((negative ? mis - nelements : nelements - mis) - & (nelements - 1)); + mis = negative ? DR_MISALIGNMENT (dr0) : -DR_MISALIGNMENT (dr0); + unsigned int target_align = DR_TARGET_ALIGNMENT (dr0); + npeel = ((mis & (target_align - 1)) + / vect_get_scalar_dr_size (dr0)); } /* For interleaved data access every iteration accesses all the @@ -1976,10 +1989,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) unsigned max_peel = npeel; if (max_peel == 0) { - gimple *dr_stmt = DR_STMT (dr0); - stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt); - tree vtype = STMT_VINFO_VECTYPE (vinfo); - max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1; + unsigned int target_align = DR_TARGET_ALIGNMENT (dr0); + max_peel = target_align / vect_get_scalar_dr_size (dr0) - 1; } if (max_peel > max_allowed_peel) { @@ -2201,8 +2212,10 @@ vect_find_same_alignment_drs (struct data_dependence_relation *ddr) if (diff != 0) { /* Get the wider of the two alignments. */ - unsigned int align_a = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmtinfo_a)); - unsigned int align_b = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmtinfo_b)); + unsigned int align_a = (vect_calculate_target_alignment (dra) + / BITS_PER_UNIT); + unsigned int align_b = (vect_calculate_target_alignment (drb) + / BITS_PER_UNIT); unsigned int max_align = MAX (align_a, align_b); /* Require the gap to be a multiple of the larger vector alignment. */ @@ -3995,16 +4008,15 @@ vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name) /* Duplicate ptr info and set alignment/misaligment on NAME from DR. */ static void -vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr, - stmt_vec_info stmt_info) +vect_duplicate_ssa_name_ptr_info (tree name, data_reference *dr) { duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr)); - unsigned int align = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmt_info)); int misalign = DR_MISALIGNMENT (dr); if (misalign == DR_MISALIGNMENT_UNKNOWN) mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name)); else - set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), align, misalign); + set_ptr_info_alignment (SSA_NAME_PTR_INFO (name), + DR_TARGET_ALIGNMENT (dr), misalign); } /* Function vect_create_addr_base_for_vector_ref. @@ -4109,7 +4121,7 @@ vect_create_addr_base_for_vector_ref (gimple *stmt, && TREE_CODE (addr_base) == SSA_NAME && !SSA_NAME_PTR_INFO (addr_base)) { - vect_duplicate_ssa_name_ptr_info (addr_base, dr, stmt_info); + vect_duplicate_ssa_name_ptr_info (addr_base, dr); if (offset || byte_offset) mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base)); } @@ -4368,8 +4380,8 @@ vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop, /* Copy the points-to information if it exists. */ if (DR_PTR_INFO (dr)) { - vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info); - vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info); + vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr); + vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr); } if (ptr_incr) *ptr_incr = incr; @@ -4398,8 +4410,8 @@ vect_create_data_ref_ptr (gimple *stmt, tree aggr_type, struct loop *at_loop, /* Copy the points-to information if it exists. */ if (DR_PTR_INFO (dr)) { - vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr, stmt_info); - vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr, stmt_info); + vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr); + vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr); } if (ptr_incr) *ptr_incr = incr; @@ -5003,10 +5015,10 @@ vect_setup_realignment (gimple *stmt, gimple_stmt_iterator *gsi, new_temp = copy_ssa_name (ptr); else new_temp = make_ssa_name (TREE_TYPE (ptr)); + unsigned int align = DR_TARGET_ALIGNMENT (dr); new_stmt = gimple_build_assign (new_temp, BIT_AND_EXPR, ptr, - build_int_cst (TREE_TYPE (ptr), - -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); + build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align)); new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); gcc_assert (!new_bb); data_ref |