aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-02-01 14:17:07 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-02-01 14:17:07 +0000
commit3ae129323d150621d216fbbcdeebf033ef82416f (patch)
tree8f337d98834feaf4ab484aa6f606a11157bea238 /gcc
parentd677263e6c6ce0b40ca791e97f7b2f4ab0718f03 (diff)
downloadgcc-3ae129323d150621d216fbbcdeebf033ef82416f.zip
gcc-3ae129323d150621d216fbbcdeebf033ef82416f.tar.gz
gcc-3ae129323d150621d216fbbcdeebf033ef82416f.tar.bz2
Use range info in split_constant_offset (PR 81635)
This patch implements the original suggestion for fixing PR 81635: use range info in split_constant_offset to see whether a conversion of a wrapping type can be split. The range info problem described in: https://gcc.gnu.org/ml/gcc-patches/2017-08/msg01002.html seems to have been fixed. The patch is part 1. There needs to be a follow-on patch to handle: for (unsigned int i = 0; i < n; i += 4) { ...[i + 2]... ...[i + 3]... which the old SCEV test handles, but which the range check doesn't. At the moment we record that the low two bits of "i" are clear, but we still end up with a maximum range of 0xffffffff rather than 0xfffffffc. 2018-01-31 Richard Sandiford <richard.sandiford@linaro.org> gcc/ PR tree-optimization/81635 * tree-data-ref.c (split_constant_offset_1): For types that wrap on overflow, try to use range info to prove that wrapping cannot occur. gcc/testsuite/ PR tree-optimization/81635 * gcc.dg/vect/bb-slp-pr81635-1.c: New test. * gcc.dg/vect/bb-slp-pr81635-2.c: Likewise. From-SVN: r257296
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c92
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c64
-rw-r--r--gcc/tree-data-ref.c39
5 files changed, 206 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8cec738..e62b93d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+
+ PR tree-optimization/81635
+ * tree-data-ref.c (split_constant_offset_1): For types that
+ wrap on overflow, try to use range info to prove that wrapping
+ cannot occur.
+
2018-02-01 Renlin Li <renlin.li@arm.com>
PR target/83370
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8fb5c32..3f2f447 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+ PR tree-optimization/81635
+ * gcc.dg/vect/bb-slp-pr81635-1.c: New test.
+ * gcc.dg/vect/bb-slp-pr81635-2.c: Likewise.
+
+2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+
PR target/83370
* gcc.target/aarch64/pr83370.c: New.
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c
new file mode 100644
index 0000000..f024dc7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c
@@ -0,0 +1,92 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target lp64 } */
+
+void
+f1 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 1000; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f2 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < ~0U - 4; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f3 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < ~0U - 3; i += 4)
+ {
+ double a = q[i + 2] + p[i + 2];
+ double b = q[i + 3] + p[i + 3];
+ q[i + 2] = a;
+ q[i + 3] = b;
+ }
+}
+
+void
+f4 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 500; i += 6)
+ for (unsigned int j = 0; j < 500; j += 4)
+ {
+ double a = q[j] + p[i];
+ double b = q[j + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+void
+f5 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < 1000; i += 4)
+ {
+ double a = q[i - 2] + p[i - 2];
+ double b = q[i - 1] + p[i - 1];
+ q[i - 2] = a;
+ q[i - 1] = b;
+ }
+}
+
+double p[1000];
+double q[1000];
+
+void
+f6 (int n)
+{
+ for (unsigned int i = 0; i < n; i += 4)
+ {
+ double a = q[i] + p[i];
+ double b = q[i + 1] + p[i + 1];
+ q[i] = a;
+ q[i + 1] = b;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 6 "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c
new file mode 100644
index 0000000..11e8f0f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fno-tree-loop-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+double p[1000];
+double q[1000];
+
+void
+f1 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < ~0U - 4; i += 4)
+ {
+ double a = q[i + 2] + p[i + 2];
+ double b = q[i + 3] + p[i + 3];
+ q[i + 2] = a;
+ q[i + 3] = b;
+ }
+}
+
+void
+f2 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < ~0U - 3; i += 4)
+ {
+ double a = q[i + 4] + p[i + 4];
+ double b = q[i + 5] + p[i + 5];
+ q[i + 4] = a;
+ q[i + 5] = b;
+ }
+}
+
+void
+f3 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 0; i < 1000; i += 4)
+ {
+ double a = q[i - 2] + p[i - 2];
+ double b = q[i - 1] + p[i - 1];
+ q[i - 2] = a;
+ q[i - 1] = b;
+ }
+}
+
+void
+f4 (double *p, double *q)
+{
+ p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
+ q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
+ for (unsigned int i = 2; i < 1000; i += 4)
+ {
+ double a = q[i - 4] + p[i - 4];
+ double b = q[i - 3] + p[i - 3];
+ q[i - 4] = a;
+ q[i - 3] = b;
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index b5c0b7f..f3070d3 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -705,11 +705,46 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
and the outer precision is at least as large as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
- || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
- split_constant_offset (op0, &var0, off);
+ if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
+ {
+ /* Split the unconverted operand and try to prove that
+ wrapping isn't a problem. */
+ tree tmp_var, tmp_off;
+ split_constant_offset (op0, &tmp_var, &tmp_off);
+
+ /* See whether we have an SSA_NAME whose range is known
+ to be [A, B]. */
+ if (TREE_CODE (tmp_var) != SSA_NAME)
+ return false;
+ wide_int var_min, var_max;
+ if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
+ return false;
+
+ /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
+ is known to be [A + TMP_OFF, B + TMP_OFF], with all
+ operations done in ITYPE. The addition must overflow
+ at both ends of the range or at neither. */
+ bool overflow[2];
+ signop sgn = TYPE_SIGN (itype);
+ unsigned int prec = TYPE_PRECISION (itype);
+ wide_int woff = wi::to_wide (tmp_off, prec);
+ wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
+ wi::add (var_max, woff, sgn, &overflow[1]);
+ if (overflow[0] != overflow[1])
+ return false;
+
+ /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
+ widest_int diff = (widest_int::from (op0_min, sgn)
+ - widest_int::from (var_min, sgn));
+ var0 = tmp_var;
+ *off = wide_int_to_tree (ssizetype, diff);
+ }
+ else
+ split_constant_offset (op0, &var0, off);
*var = fold_convert (type, var0);
return true;
}