aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2025-01-21 14:58:43 +0100
committerRichard Biener <rguenth@gcc.gnu.org>2025-01-23 08:43:39 +0100
commit7fffff1deb47a70ff804f0b2cce7be7e5fe8ba13 (patch)
tree27d8d5cf5ad0573360b8b6bd5085010872534649
parent2119c254606c58d247d3d0b92bf192780d375ddb (diff)
downloadgcc-7fffff1deb47a70ff804f0b2cce7be7e5fe8ba13.zip
gcc-7fffff1deb47a70ff804f0b2cce7be7e5fe8ba13.tar.gz
gcc-7fffff1deb47a70ff804f0b2cce7be7e5fe8ba13.tar.bz2
tree-optimization/118558 - fix alignment compute with VMAT_CONTIGUOUS_REVERSE
There are calls to dr_misalignment left that do not correct for the offset (which is vector type dependent) when the stride is negative. Notably vect_known_alignment_in_bytes doesn't allow to pass through such offset which the following adds (computing the offset in vect_known_alignment_in_bytes would be possible as well, but the offset can be shared as seen). Eventually this function could go away. This leads to peeling for gaps not considerd, nor shortening of the access applied which is what fixes the testcase on x86_64. PR tree-optimization/118558 * tree-vectorizer.h (vect_known_alignment_in_bytes): Pass through offset to dr_misalignment. * tree-vect-stmts.cc (get_group_load_store_type): Compute offset applied for negative stride and use it when querying alignment of accesses. (vectorizable_load): Likewise. * gcc.dg/vect/pr118558.c: New testcase.
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr118558.c15
-rw-r--r--gcc/tree-vect-stmts.cc24
-rw-r--r--gcc/tree-vectorizer.h5
3 files changed, 35 insertions, 9 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/pr118558.c b/gcc/testsuite/gcc.dg/vect/pr118558.c
new file mode 100644
index 0000000..5483328
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr118558.c
@@ -0,0 +1,15 @@
+#include "tree-vect.h"
+
+static unsigned long g_270[5][2] = {{123}};
+static short g_2312 = 0;
+int main()
+{
+ check_vect ();
+ int g_1168 = 0;
+ unsigned t = 4;
+ for (g_1168 = 3; g_1168 >= 0; g_1168 -= 1)
+ for (g_2312 = 0; g_2312 <= 1; g_2312 += 1)
+ t = g_270[g_1168][0];
+ if (t != 123) __builtin_abort();
+}
+
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 21fb5cf..c0550ac 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2198,14 +2198,20 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
" non-consecutive accesses\n");
return false;
}
+
+ unsigned HOST_WIDE_INT dr_size
+ = vect_get_scalar_dr_size (first_dr_info);
+ poly_int64 off = 0;
+ if (*memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
+
/* An overrun is fine if the trailing elements are smaller
than the alignment boundary B. Every vector access will
be a multiple of B and so we are guaranteed to access a
non-gap element in the same B-sized block. */
if (overrun_p
&& gap < (vect_known_alignment_in_bytes (first_dr_info,
- vectype)
- / vect_get_scalar_dr_size (first_dr_info)))
+ vectype, off) / dr_size))
overrun_p = false;
/* When we have a contiguous access across loop iterations
@@ -2230,7 +2236,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
by simply loading half of the vector only. Usually
the construction with an upper zero half will be elided. */
dr_alignment_support alss;
- int misalign = dr_misalignment (first_dr_info, vectype);
+ int misalign = dr_misalignment (first_dr_info, vectype, off);
tree half_vtype;
poly_uint64 remain;
unsigned HOST_WIDE_INT tem, num;
@@ -11991,8 +11997,14 @@ vectorizable_load (vec_info *vinfo,
tree ltype = vectype;
tree new_vtype = NULL_TREE;
unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
+ unsigned HOST_WIDE_INT dr_size
+ = vect_get_scalar_dr_size (first_dr_info);
+ poly_int64 off = 0;
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
unsigned int vect_align
- = vect_known_alignment_in_bytes (first_dr_info, vectype);
+ = vect_known_alignment_in_bytes (first_dr_info, vectype,
+ off);
/* Try to use a single smaller load when we are about
to load excess elements compared to the unrolled
scalar loop. */
@@ -12013,9 +12025,7 @@ vectorizable_load (vec_info *vinfo,
scalar loop. */
;
else if (known_gt (vect_align,
- ((nunits - remain)
- * vect_get_scalar_dr_size
- (first_dr_info))))
+ ((nunits - remain) * dr_size)))
/* Aligned access to the gap area when there's
at least one element in it is OK. */
;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 79db02a..44d3a1d 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2028,9 +2028,10 @@ known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype)
of DR_INFO is guaranteed to have. */
inline unsigned int
-vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype)
+vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype,
+ poly_int64 offset = 0)
{
- int misalignment = dr_misalignment (dr_info, vectype);
+ int misalignment = dr_misalignment (dr_info, vectype, offset);
if (misalignment == DR_MISALIGNMENT_UNKNOWN)
return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
else if (misalignment == 0)