aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c92
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c64
-rw-r--r--gcc/tree-data-ref.c39
5 files changed, 206 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8cec738..e62b93d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+
+ PR tree-optimization/81635
+ * tree-data-ref.c (split_constant_offset_1): For types that
+ wrap on overflow, try to use range info to prove that wrapping
+ cannot occur.
+
2018-02-01 Renlin Li <renlin.li@arm.com>
PR target/83370
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8fb5c32..3f2f447 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+ PR tree-optimization/81635
+ * gcc.dg/vect/bb-slp-pr81635-1.c: New test.
+ * gcc.dg/vect/bb-slp-pr81635-2.c: Likewise.
+
+2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+
PR target/83370
* gcc.target/aarch64/pr83370.c: New.
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c
new file mode 100644
index 0000000..f024dc7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c
@@ -0,0 +1,92 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target lp64 } */
+
+void
+f1 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 1000; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f2 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < ~0U - 4; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f3 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < ~0U - 3; i += 4)
+ {
+ double a = q[i + 2] + p[i + 2];
+ double b = q[i + 3] + p[i + 3];
+ q[i + 2] = a;
+ q[i + 3] = b;
+ }
+}
+
+void
+f4 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 500; i += 6)
+ for (unsigned int j = 0; j < 500; j += 4)
+ {
+ double a = q[j] + p[i];
+ double b = q[j + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f5 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < 1000; i += 4)
+ {
+ double a = q[i - 2] + p[i - 2];
+ double b = q[i - 1] + p[i - 1];
+ q[i - 2] = a;
+ q[i - 1] = b;
+ }
+}
+
+double p[1000];
+double q[1000];
+
+void
+f6 (int n)
+{
+ for (unsigned int i = 0; i < n; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 6 "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c
new file mode 100644
index 0000000..11e8f0f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+double p[1000];
+double q[1000];
+
+void
+f1 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < ~0U - 4; i += 4)
+ {
+ double a = q[i + 2] + p[i + 2];
+ double b = q[i + 3] + p[i + 3];
+ q[i + 2] = a;
+ q[i + 3] = b;
+ }
+}
+
+void
+f2 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < ~0U - 3; i += 4)
+ {
+ double a = q[i + 4] + p[i + 4];
+ double b = q[i + 5] + p[i + 5];
+ q[i + 4] = a;
+ q[i + 5] = b;
+ }
+}
+
+void
+f3 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 1000; i += 4)
+ {
+ double a = q[i - 2] + p[i - 2];
+ double b = q[i - 1] + p[i - 1];
+ q[i - 2] = a;
+ q[i - 1] = b;
+ }
+}
+
+void
+f4 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < 1000; i += 4)
+ {
+ double a = q[i - 4] + p[i - 4];
+ double b = q[i - 3] + p[i - 3];
+ q[i - 4] = a;
+ q[i - 3] = b;
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index b5c0b7f..f3070d3 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -705,11 +705,46 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
and the outer precision is at least as large as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
- || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
- split_constant_offset (op0, &var0, off);
+ if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+ {
+ /* Split the unconverted operand and try to prove that
+ wrapping isn't a problem. */
+ tree tmp_var, tmp_off;
+ split_constant_offset (op0, &tmp_var, &tmp_off);
+
+ /* See whether we have an SSA_NAME whose range is known
+ to be [A, B]. */
+ if (TREE_CODE (tmp_var) != SSA_NAME)
+ return false;
+ wide_int var_min, var_max;
+ if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
+ return false;
+
+ /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
+ is known to be [A + TMP_OFF, B + TMP_OFF], with all
+ operations done in ITYPE. The addition must overflow
+ at both ends of the range or at neither. */
+ bool overflow[2];
+ signop sgn = TYPE_SIGN (itype);
+ unsigned int prec = TYPE_PRECISION (itype);
+ wide_int woff = wi::to_wide (tmp_off, prec);
+ wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
+ wi::add (var_max, woff, sgn, &overflow[1]);
+ if (overflow[0] != overflow[1])
+ return false;
+
+ /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
+ widest_int diff = (widest_int::from (op0_min, sgn)
+ - widest_int::from (var_min, sgn));
+ var0 = tmp_var;
+ *off = wide_int_to_tree (ssizetype, diff);
+ }
+ else
+ split_constant_offset (op0, &var0, off);
*var = fold_convert (type, var0);
return true;
}