Fix PEELING_FOR_NITERS calculation (PR 87288)

PEELING_FOR_GAPS now means "peel one iteration for the epilogue", in much the same way that PEELING_FOR_ALIGNMENT > 0 means "peel that number of iterations for the prologue". We weren't taking this into account when deciding whether we needed to peel further scalar iterations beyond the iterations for "gaps" and "alignment". Only the first test failed before the patch. The other two are just for completeness. 2018-09-20 Richard Sandiford <richard.sandiford@arm.com> gcc/ PR tree-optimization/87288 * tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS into account when determining PEELING_FOR_NITERS. gcc/testsuite/ PR tree-optimization/87288 * gcc.dg/vect/pr87288-1.c: New test. * gcc.dg/vect/pr87288-2.c: Likewise, * gcc.dg/vect/pr87288-3.c: Likewise. From-SVN: r264440
author: Richard Sandiford <richard.sandiford@arm.com> 2018-09-20 12:58:23 +0000
committer: Richard Sandiford <rsandifo@gcc.gnu.org> 2018-09-20 12:58:23 +0000
commit: 2d2ee18641557deba692c286cbc2d8751310f697 (patch)
tree: d8c4f81ba2a6850b20fcfc807b297787b0a8afe3 /gcc
parent: 508a909eca536f7f6a60af9bd7ecea761bd2e8f1 (diff)
download: gcc-2d2ee18641557deba692c286cbc2d8751310f697.zip
gcc-2d2ee18641557deba692c286cbc2d8751310f697.tar.gz
gcc-2d2ee18641557deba692c286cbc2d8751310f697.tar.bz2
6 files changed, 201 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index af5fddf1..f400284 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
 2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
 
+	PR tree-optimization/87288
+	* tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
+	into account when determining PEELING_FOR_NITERS.
+
+2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
+
 	PR tree-optimization/86877
 	* tree-vect-loop.c (vect_analyze_loop_2): Call
 	vect_verify_datarefs_alignment.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ade95db..c7c8e90 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,12 @@
 2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
 
+	PR tree-optimization/87288
+	* gcc.dg/vect/pr87288-1.c: New test.
+	* gcc.dg/vect/pr87288-2.c: Likewise,
+	* gcc.dg/vect/pr87288-3.c: Likewise.
+
+2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>
+
 	PR tree-optimization/86877
 	* gfortran.dg/vect/vect-8-epilogue.F90: New test.
 
diff --git a/gcc/testsuite/gcc.dg/vect/pr87288-1.c b/gcc/testsuite/gcc.dg/vect/pr87288-1.c
new file mode 100644
index 0000000..0d0a70d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr87288-1.c
@@ -0,0 +1,49 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+void __attribute__ ((noipa))
+run (int *restrict a, int *restrict b, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    {
+      a[i * 2] = b[i * 2] + count;
+      a[i * 2 + 1] = count;
+    }
+}
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  for (int i = 0; i <= MAX_COUNT; ++i)
+    {
+      a[i * 2 * N] = 999;
+      run (a, b, i);
+      check (a, i);
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 1 "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr87288-2.c b/gcc/testsuite/gcc.dg/vect/pr87288-2.c
new file mode 100644
index 0000000..e9ff9a0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr87288-2.c
@@ -0,0 +1,64 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+#define RUN_COUNT(COUNT)				\
+  void __attribute__ ((noipa))				\
+  run_##COUNT (int *restrict a, int *restrict b)	\
+  {							\
+    for (int i = 0; i < N * COUNT; ++i)			\
+      {							\
+	a[i * 2] = b[i * 2] + COUNT;			\
+	a[i * 2 + 1] = COUNT;				\
+      }							\
+  }
+
+RUN_COUNT (1)
+RUN_COUNT (2)
+RUN_COUNT (3)
+RUN_COUNT (4)
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  a[N * 2] = 999;
+  run_1 (a, b);
+  check (a, 1);
+
+  a[N * 4] = 999;
+  run_2 (a, b);
+  check (a, 2);
+
+  a[N * 6] = 999;
+  run_3 (a, b);
+  check (a, 3);
+
+  a[N * 8] = 999;
+  run_4 (a, b);
+  check (a, 4);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr87288-3.c b/gcc/testsuite/gcc.dg/vect/pr87288-3.c
new file mode 100644
index 0000000..23f574c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr87288-3.c
@@ -0,0 +1,64 @@
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS / 32)
+#define MAX_COUNT 4
+
+#define RUN_COUNT(COUNT)				\
+  void __attribute__ ((noipa))				\
+  run_##COUNT (int *restrict a, int *restrict b)	\
+  {							\
+    for (int i = 0; i < N * COUNT + 1; ++i)		\
+      {							\
+	a[i * 2] = b[i * 2] + COUNT;			\
+	a[i * 2 + 1] = COUNT;				\
+      }							\
+  }
+
+RUN_COUNT (1)
+RUN_COUNT (2)
+RUN_COUNT (3)
+RUN_COUNT (4)
+
+void __attribute__ ((noipa))
+check (int *restrict a, int count)
+{
+  for (int i = 0; i < count * N + 1; ++i)
+    if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
+      __builtin_abort ();
+  if (a[count * 2 * N + 2] != 999)
+    __builtin_abort ();
+}
+
+int a[N * MAX_COUNT * 2 + 3], b[N * MAX_COUNT * 2 + 2];
+
+int
+main (void)
+{
+  check_vect ();
+
+  for (int i = 0; i < N * MAX_COUNT + 1; ++i)
+    {
+      b[i * 2] = i * 41;
+      asm volatile ("" ::: "memory");
+    }
+
+  a[N * 2 + 2] = 999;
+  run_1 (a, b);
+  check (a, 1);
+
+  a[N * 4 + 2] = 999;
+  run_2 (a, b);
+  check (a, 2);
+
+  a[N * 6 + 2] = 999;
+  run_3 (a, b);
+  check (a, 3);
+
+  a[N * 8 + 2] = 999;
+  run_4 (a, b);
+  check (a, 4);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 70e9157..fdac10b 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2074,14 +2074,22 @@ start_over:
     /* The main loop handles all iterations.  */
     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
     {
-      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)
-		       - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo),
+      /* Work out the (constant) number of iterations that need to be
+	 peeled for reasons other than niters.  */
+      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+	peel_niter += 1;
+      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
 		       LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
 	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
     }
   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+	   /* ??? When peeling for gaps but not alignment, we could
+	      try to check whether the (variable) niters is known to be
+	      VF * N + 1.  That's something of a niche case though.  */
+	   || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
 	   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
 	   || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
 		< (unsigned) exact_log2 (const_vf))
author	Richard Sandiford <richard.sandiford@arm.com>	2018-09-20 12:58:23 +0000
committer	Richard Sandiford <rsandifo@gcc.gnu.org>	2018-09-20 12:58:23 +0000
commit	2d2ee18641557deba692c286cbc2d8751310f697 (patch)
tree	d8c4f81ba2a6850b20fcfc807b297787b0a8afe3 /gcc
parent	508a909eca536f7f6a60af9bd7ecea761bd2e8f1 (diff)
download	gcc-2d2ee18641557deba692c286cbc2d8751310f697.zip gcc-2d2ee18641557deba692c286cbc2d8751310f697.tar.gz gcc-2d2ee18641557deba692c286cbc2d8751310f697.tar.bz2