tree-optimization/119589 - alignment analysis for VF > 1 and VMAT_STRIDED_SLP

The following fixes the alignment analysis done by the VMAT_STRIDED_SLP code which for the case of VF > 1 currently relies on dataref analysis which assumes consecutive accesses. But the code generation advances by DR_STEP between each iteration which requires us to assess that individual DR_STEP preserve the alignment rather than only VF * DR_STEP. This allows us to use vector aligned accesses in some cases. PR tree-optimization/119589 PR tree-optimization/119586 PR tree-optimization/119155 * tree-vect-stmts.cc (vectorizable_store): Verify DR_STEP_ALIGNMENT preserves DR_TARGET_ALIGNMENT when VF > 1 and VMAT_STRIDED_SLP. Use vector aligned accesses when we can. (vectorizable_load): Likewise.
author: Richard Biener <rguenther@suse.de> 2025-05-06 13:29:42 +0200
committer: Richard Biener <rguenth@gcc.gnu.org> 2025-05-08 08:36:53 +0200
commit: 9e85d056cd15befffb39d2f84902d21eda4d98eb (patch)
tree: eceb75a1d9b7dd19f05caba69246546688a4bca5
parent: da377e7ebf84a05943fb768eaeb7d682dee865fa (diff)
download: gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.zip
gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.tar.gz
gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.tar.bz2
1 files changed, 34 insertions, 13 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index af7114d..a8762ba 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8791,6 +8791,15 @@ vectorizable_store (vec_info *vinfo,
 	  if (n == const_nunits)
 	    {
 	      int mis_align = dr_misalignment (first_dr_info, vectype);
+	      /* With VF > 1 we advance the DR by step, if that is constant
+		 and only aligned when performed VF times, DR alignment
+		 analysis can analyze this as aligned since it assumes
+		 contiguous accesses.  But that is not how we code generate
+		 here, so adjust for this.  */
+	      if (maybe_gt (vf, 1u)
+		  && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+				  DR_TARGET_ALIGNMENT (first_dr_info)))
+		mis_align = -1;
 	      dr_alignment_support dr_align
 		= vect_supportable_dr_alignment (vinfo, dr_info, vectype,
 						 mis_align);
@@ -8812,6 +8821,10 @@ vectorizable_store (vec_info *vinfo,
 	      ltype = build_vector_type (elem_type, n);
 	      lvectype = vectype;
 	      int mis_align = dr_misalignment (first_dr_info, ltype);
+	      if (maybe_gt (vf, 1u)
+		  && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+				  DR_TARGET_ALIGNMENT (first_dr_info)))
+		mis_align = -1;
 	      dr_alignment_support dr_align
 		= vect_supportable_dr_alignment (vinfo, dr_info, ltype,
 						 mis_align);
@@ -8872,17 +8885,10 @@ vectorizable_store (vec_info *vinfo,
 		}
 	    }
 	  unsigned align;
-	  /* ???  We'd want to use
-	       if (alignment_support_scheme == dr_aligned)
-		 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
-	     since doing that is what we assume we can in the above checks.
-	     But this interferes with groups with gaps where for example
-	     VF == 2 makes the group in the unrolled loop aligned but the
-	     fact that we advance with step between the two subgroups
-	     makes the access to the second unaligned.  See PR119586.
-	     We have to anticipate that here or adjust code generation to
-	     avoid the misaligned loads by means of permutations.  */
-	  align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+	  if (alignment_support_scheme == dr_aligned)
+	    align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+	  else
+	    align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
 	  /* Alignment is at most the access size if we do multiple stores.  */
 	  if (nstores > 1)
 	    align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
@@ -10810,6 +10816,15 @@ vectorizable_load (vec_info *vinfo,
 	  if (n == const_nunits)
 	    {
 	      int mis_align = dr_misalignment (first_dr_info, vectype);
+	      /* With VF > 1 we advance the DR by step, if that is constant
+		 and only aligned when performed VF times, DR alignment
+		 analysis can analyze this as aligned since it assumes
+		 contiguous accesses.  But that is not how we code generate
+		 here, so adjust for this.  */
+	      if (maybe_gt (vf, 1u)
+		  && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+				  DR_TARGET_ALIGNMENT (first_dr_info)))
+		mis_align = -1;
 	      dr_alignment_support dr_align
 		= vect_supportable_dr_alignment (vinfo, dr_info, vectype,
 						 mis_align);
@@ -10838,6 +10853,10 @@ vectorizable_load (vec_info *vinfo,
 		  if (VECTOR_TYPE_P (ptype))
 		    {
 		      mis_align = dr_misalignment (first_dr_info, ptype);
+		      if (maybe_gt (vf, 1u)
+			  && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+					  DR_TARGET_ALIGNMENT (first_dr_info)))
+			mis_align = -1;
 		      dr_align
 			= vect_supportable_dr_alignment (vinfo, dr_info, ptype,
 							 mis_align);
@@ -10857,8 +10876,10 @@ vectorizable_load (vec_info *vinfo,
 		}
 	    }
 	  unsigned align;
-	  /* ???  The above is still wrong, see vectorizable_store.  */
-	  align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+	  if (alignment_support_scheme == dr_aligned)
+	    align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+	  else
+	    align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
 	  /* Alignment is at most the access size if we do multiple loads.  */
 	  if (nloads > 1)
 	    align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
author	Richard Biener <rguenther@suse.de>	2025-05-06 13:29:42 +0200
committer	Richard Biener <rguenth@gcc.gnu.org>	2025-05-08 08:36:53 +0200
commit	9e85d056cd15befffb39d2f84902d21eda4d98eb (patch)
tree	eceb75a1d9b7dd19f05caba69246546688a4bca5
parent	da377e7ebf84a05943fb768eaeb7d682dee865fa (diff)
download	gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.zip gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.tar.gz gcc-9e85d056cd15befffb39d2f84902d21eda4d98eb.tar.bz2