diff options
author | Richard Biener <rguenther@suse.de> | 2025-05-06 13:29:42 +0200 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2025-05-08 08:36:53 +0200 |
commit | 9e85d056cd15befffb39d2f84902d21eda4d98eb (patch) | |
tree | eceb75a1d9b7dd19f05caba69246546688a4bca5 | |
parent | da377e7ebf84a05943fb768eaeb7d682dee865fa (diff) | |
download | gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.zip gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.tar.gz gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.tar.bz2 |
tree-optimization/119589 - alignment analysis for VF > 1 and VMAT_STRIDED_SLP
The following fixes the alignment analysis done by the VMAT_STRIDED_SLP
code which for the case of VF > 1 currently relies on dataref analysis
which assumes consecutive accesses. But the code generation advances
by DR_STEP between each iteration which requires us to assess that
individual DR_STEP preserve the alignment rather than only VF * DR_STEP.
This allows us to use vector aligned accesses in some cases.
PR tree-optimization/119589
PR tree-optimization/119586
PR tree-optimization/119155
* tree-vect-stmts.cc (vectorizable_store): Verify
DR_STEP_ALIGNMENT preserves DR_TARGET_ALIGNMENT when
VF > 1 and VMAT_STRIDED_SLP. Use vector aligned accesses when
we can.
(vectorizable_load): Likewise.
-rw-r--r-- | gcc/tree-vect-stmts.cc | 47 |
1 files changed, 34 insertions, 13 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index af7114d..a8762ba 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -8791,6 +8791,15 @@ vectorizable_store (vec_info *vinfo, if (n == const_nunits) { int mis_align = dr_misalignment (first_dr_info, vectype); + /* With VF > 1 we advance the DR by step, if that is constant + and only aligned when performed VF times, DR alignment + analysis can analyze this as aligned since it assumes + contiguous accesses. But that is not how we code generate + here, so adjust for this. */ + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, vectype, mis_align); @@ -8812,6 +8821,10 @@ vectorizable_store (vec_info *vinfo, ltype = build_vector_type (elem_type, n); lvectype = vectype; int mis_align = dr_misalignment (first_dr_info, ltype); + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, ltype, mis_align); @@ -8872,17 +8885,10 @@ vectorizable_store (vec_info *vinfo, } } unsigned align; - /* ??? We'd want to use - if (alignment_support_scheme == dr_aligned) - align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); - since doing that is what we assume we can in the above checks. - But this interferes with groups with gaps where for example - VF == 2 makes the group in the unrolled loop aligned but the - fact that we advance with step between the two subgroups - makes the access to the second unaligned. See PR119586. - We have to anticipate that here or adjust code generation to - avoid the misaligned loads by means of permutations. */ - align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); + if (alignment_support_scheme == dr_aligned) + align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); + else + align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); /* Alignment is at most the access size if we do multiple stores. */ if (nstores > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align); @@ -10810,6 +10816,15 @@ vectorizable_load (vec_info *vinfo, if (n == const_nunits) { int mis_align = dr_misalignment (first_dr_info, vectype); + /* With VF > 1 we advance the DR by step, if that is constant + and only aligned when performed VF times, DR alignment + analysis can analyze this as aligned since it assumes + contiguous accesses. But that is not how we code generate + here, so adjust for this. */ + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, vectype, mis_align); @@ -10838,6 +10853,10 @@ vectorizable_load (vec_info *vinfo, if (VECTOR_TYPE_P (ptype)) { mis_align = dr_misalignment (first_dr_info, ptype); + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_align = vect_supportable_dr_alignment (vinfo, dr_info, ptype, mis_align); @@ -10857,8 +10876,10 @@ vectorizable_load (vec_info *vinfo, } } unsigned align; - /* ??? The above is still wrong, see vectorizable_store. */ - align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); + if (alignment_support_scheme == dr_aligned) + align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); + else + align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); /* Alignment is at most the access size if we do multiple loads. */ if (nloads > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align); |