diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2017-09-22 16:44:29 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2017-09-22 16:44:29 +0000 |
commit | f702e7d43f2aec71640d0db7ecf1543ba75f37c3 (patch) | |
tree | 9beee33fae8a909f405a3f3bb2328cab22dde95c /gcc/tree-vect-stmts.c | |
parent | b2b67217d3c0901f7c414b7f4b4a92e92678846e (diff) | |
download | gcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.zip gcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.tar.gz gcc-f702e7d43f2aec71640d0db7ecf1543ba75f37c3.tar.bz2 |
Let the target choose a vectorisation alignment
The vectoriser aligned vectors to TYPE_ALIGN unconditionally, although
there was also a hard-coded assumption that this was equal to the type
size. This was inconvenient for SVE for two reasons:
- When compiling for a specific power-of-2 SVE vector length, we might
want to align to a full vector. However, the TYPE_ALIGN is governed
by the ABI alignment, which is 128 bits regardless of size.
- For vector-length-agnostic code it doesn't usually make sense to align,
since the runtime vector length might not be a power of two. Even for
power of two sizes, there's no guarantee that aligning to the previous
16 bytes will be an improveent.
This patch therefore adds a target hook to control the preferred
vectoriser (as opposed to ABI) alignment.
2017-09-22 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* target.def (preferred_vector_alignment): New hook.
* doc/tm.texi.in (TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT): New
hook.
* doc/tm.texi: Regenerate.
* targhooks.h (default_preferred_vector_alignment): Declare.
* targhooks.c (default_preferred_vector_alignment): New function.
* tree-vectorizer.h (dataref_aux): Add a target_alignment field.
Expand commentary.
(DR_TARGET_ALIGNMENT): New macro.
(aligned_access_p): Update commentary.
(vect_known_alignment_in_bytes): New function.
* tree-vect-data-refs.c (vect_calculate_required_alignment): New
function.
(vect_compute_data_ref_alignment): Set DR_TARGET_ALIGNMENT.
Calculate the misalignment based on the target alignment rather than
the vector size.
(vect_update_misalignment_for_peel): Use DR_TARGET_ALIGMENT
rather than TYPE_ALIGN / BITS_PER_UNIT to update the misalignment.
(vect_enhance_data_refs_alignment): Mask the byte misalignment with
the target alignment, rather than masking the element misalignment
with the number of elements in a vector. Also use the target
alignment when calculating the maximum number of peels.
(vect_find_same_alignment_drs): Use vect_calculate_required_alignment
instead of TYPE_ALIGN_UNIT.
(vect_duplicate_ssa_name_ptr_info): Remove stmt_info parameter.
Measure DR_MISALIGNMENT relative to DR_TARGET_ALIGNMENT.
(vect_create_addr_base_for_vector_ref): Update call accordingly.
(vect_create_data_ref_ptr): Likewise.
(vect_setup_realignment): Realign by ANDing with
-DR_TARGET_MISALIGNMENT.
* tree-vect-loop-manip.c (vect_gen_prolog_loop_niters): Calculate
the number of peels based on DR_TARGET_ALIGNMENT.
* tree-vect-stmts.c (get_group_load_store_type): Compare the gap
with the guaranteed alignment boundary when deciding whether
overrun is OK.
(vectorizable_mask_load_store): Interpret DR_MISALIGNMENT
relative to DR_TARGET_ALIGNMENT instead of TYPE_ALIGN_UNIT.
(ensure_base_align): Remove stmt_info parameter. Get the
target base alignment from DR_TARGET_ALIGNMENT.
(vectorizable_store): Update call accordingly. Interpret
DR_MISALIGNMENT relative to DR_TARGET_ALIGNMENT instead of
TYPE_ALIGN_UNIT.
(vectorizable_load): Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-outer-3a.c: Adjust dump scan for new wording
of alignment message.
* gcc.dg/vect/vect-outer-3a-big-array.c: Likewise.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r253101
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r-- | gcc/tree-vect-stmts.c | 83 |
1 files changed, 45 insertions, 38 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 8f0d3d0..29b7333 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -1737,6 +1737,7 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp, loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info); + data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); bool single_element_p = (stmt == first_stmt && !GROUP_NEXT_ELEMENT (stmt_info)); @@ -1780,10 +1781,13 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp, " non-consecutive accesses\n"); return false; } - /* If the access is aligned an overrun is fine. */ + /* An overrun is fine if the trailing elements are smaller + than the alignment boundary B. Every vector access will + be a multiple of B and so we are guaranteed to access a + non-gap element in the same B-sized block. */ if (overrun_p - && aligned_access_p - (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))) + && gap < (vect_known_alignment_in_bytes (first_dr) + / vect_get_scalar_dr_size (first_dr))) overrun_p = false; if (overrun_p && !can_overrun_p) { @@ -1804,14 +1808,15 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp, /* If there is a gap at the end of the group then these optimizations would access excess elements in the last iteration. */ bool would_overrun_p = (gap != 0); - /* If the access is aligned an overrun is fine, but only if the - overrun is not inside an unused vector (if the gap is as large - or larger than a vector). */ + /* An overrun is fine if the trailing elements are smaller than the + alignment boundary B. Every vector access will be a multiple of B + and so we are guaranteed to access a non-gap element in the + same B-sized block. */ if (would_overrun_p - && gap < nunits - && aligned_access_p - (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))) + && gap < (vect_known_alignment_in_bytes (first_dr) + / vect_get_scalar_dr_size (first_dr))) would_overrun_p = false; + if (!STMT_VINFO_STRIDED_P (stmt_info) && (can_overrun_p || !would_overrun_p) && compare_step_with_zero (stmt) > 0) @@ -2351,7 +2356,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, TYPE_SIZE_UNIT (vectype)); } - align = TYPE_ALIGN_UNIT (vectype); + align = DR_TARGET_ALIGNMENT (dr); if (aligned_access_p (dr)) misalign = 0; else if (DR_MISALIGNMENT (dr) == -1) @@ -2404,7 +2409,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, TYPE_SIZE_UNIT (vectype)); } - align = TYPE_ALIGN_UNIT (vectype); + align = DR_TARGET_ALIGNMENT (dr); if (aligned_access_p (dr)) misalign = 0; else if (DR_MISALIGNMENT (dr) == -1) @@ -5553,25 +5558,25 @@ vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi, return true; } -/* A helper function to ensure data reference DR's base alignment - for STMT_INFO. */ +/* A helper function to ensure data reference DR's base alignment. */ static void -ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr) +ensure_base_align (struct data_reference *dr) { if (!dr->aux) return; if (DR_VECT_AUX (dr)->base_misaligned) { - tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree base_decl = DR_VECT_AUX (dr)->base_decl; + unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT; + if (decl_in_symtab_p (base_decl)) - symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype)); + symtab_node::get (base_decl)->increase_alignment (align_base_to); else { - SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype)); + SET_DECL_ALIGN (base_decl, align_base_to); DECL_USER_ALIGN (base_decl) = 1; } DR_VECT_AUX (dr)->base_misaligned = false; @@ -5775,7 +5780,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, /* Transform. */ - ensure_base_align (stmt_info, dr); + ensure_base_align (dr); if (memory_access_type == VMAT_GATHER_SCATTER) { @@ -6417,7 +6422,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, dataref_offset ? dataref_offset : build_int_cst (ref_type, 0)); - align = TYPE_ALIGN_UNIT (vectype); + align = DR_TARGET_ALIGNMENT (first_dr); if (aligned_access_p (first_dr)) misalign = 0; else if (DR_MISALIGNMENT (first_dr) == -1) @@ -6813,7 +6818,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, /* Transform. */ - ensure_base_align (stmt_info, dr); + ensure_base_align (dr); if (memory_access_type == VMAT_GATHER_SCATTER) { @@ -7512,7 +7517,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, dataref_offset ? dataref_offset : build_int_cst (ref_type, 0)); - align = TYPE_ALIGN_UNIT (vectype); + align = DR_TARGET_ALIGNMENT (dr); if (alignment_support_scheme == dr_aligned) { gcc_assert (aligned_access_p (first_dr)); @@ -7555,11 +7560,12 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ptr = copy_ssa_name (dataref_ptr); else ptr = make_ssa_name (TREE_TYPE (dataref_ptr)); + unsigned int align = DR_TARGET_ALIGNMENT (first_dr); new_stmt = gimple_build_assign (ptr, BIT_AND_EXPR, dataref_ptr, build_int_cst (TREE_TYPE (dataref_ptr), - -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); + -(HOST_WIDE_INT) align)); vect_finish_stmt_generation (stmt, new_stmt, gsi); data_ref = build2 (MEM_REF, vectype, ptr, @@ -7581,8 +7587,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR, ptr, build_int_cst - (TREE_TYPE (ptr), - -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); + (TREE_TYPE (ptr), -(HOST_WIDE_INT) align)); ptr = copy_ssa_name (ptr, new_stmt); gimple_assign_set_lhs (new_stmt, ptr); vect_finish_stmt_generation (stmt, new_stmt, gsi); @@ -7592,20 +7597,22 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, break; } case dr_explicit_realign_optimized: - if (TREE_CODE (dataref_ptr) == SSA_NAME) - new_temp = copy_ssa_name (dataref_ptr); - else - new_temp = make_ssa_name (TREE_TYPE (dataref_ptr)); - new_stmt = gimple_build_assign - (new_temp, BIT_AND_EXPR, dataref_ptr, - build_int_cst - (TREE_TYPE (dataref_ptr), - -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype))); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - data_ref - = build2 (MEM_REF, vectype, new_temp, - build_int_cst (ref_type, 0)); - break; + { + if (TREE_CODE (dataref_ptr) == SSA_NAME) + new_temp = copy_ssa_name (dataref_ptr); + else + new_temp = make_ssa_name (TREE_TYPE (dataref_ptr)); + unsigned int align = DR_TARGET_ALIGNMENT (first_dr); + new_stmt = gimple_build_assign + (new_temp, BIT_AND_EXPR, dataref_ptr, + build_int_cst (TREE_TYPE (dataref_ptr), + -(HOST_WIDE_INT) align)); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + data_ref + = build2 (MEM_REF, vectype, new_temp, + build_int_cst (ref_type, 0)); + break; + } default: gcc_unreachable (); } |