tree-optimization/104112 - add check for vect epilogue reduc reuse

This adds a missing check for the availability of intermediate vector types required to re-use the accumulator of a vectorized reduction in the vectorized epilogue. For SVE and VNx2DF vs V2DF with -msve-vector-bits=512 for example V4DF is not available. In addition to that we have to verify the reduction operation is supported, otherwise we for example on i?86 get vector code that's later decomposed again by vector lowering when trying to use a V2HI epilogue for a V8HI reduction with a target without TARGET_MMX_WITH_SSE. It might be we want -Wvector-operation-performance for all vect.exp tests but that seems to have existing regressions. 2022-01-19 Richard Biener <rguenther@suse.de> PR tree-optimization/104112 * tree-vect-loop.cc (vect_find_reusable_accumulator): Check for required intermediate vector types. * gcc.dg/vect/pr104112-1.c: New testcase. * gcc.dg/vect/pr104112-2.c: New testcase.
author: Richard Biener <rguenther@suse.de> 2022-01-19 12:31:30 +0100
committer: Richard Biener <rguenther@suse.de> 2022-01-19 14:55:15 +0100
commit: 5b6788ae5a365aecd6776f563e44022acb93a57a (patch)
tree: bee81495b90c4dc66434d00a6e3ffe311366d03c /gcc
parent: 68aa3c08ef1c33d9ad1263f4b939638920de9f11 (diff)
download: gcc-5b6788ae5a365aecd6776f563e44022acb93a57a.zip
gcc-5b6788ae5a365aecd6776f563e44022acb93a57a.tar.gz
gcc-5b6788ae5a365aecd6776f563e44022acb93a57a.tar.bz2
3 files changed, 43 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-1.c b/gcc/testsuite/gcc.dg/vect/pr104112-1.c
new file mode 100644
index 0000000..84e69b8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104112-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=512" { target aarch64-*-* } } */
+
+void
+boom(int n, double *a, double *x)
+{
+  int i, j;
+  double temp;
+
+  for (j = n; j >= 1; --j)
+    {
+      temp = x[j];
+      for (i = j - 1; i >= 1; --i)
+	temp += a[i + j] * x[i];
+      x[j] = temp;
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-2.c b/gcc/testsuite/gcc.dg/vect/pr104112-2.c
new file mode 100644
index 0000000..7469b3c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104112-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* Diagnose vector ops that are later decomposed.  */
+/* { dg-additional-options "-Wvector-operation-performance" } */
+
+unsigned short foo (unsigned short *a, int n)
+{
+  unsigned short sum = 0;
+  for (int i = 0; i < n; ++i)
+    sum += a[i];
+  return sum;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0fe3529..0b2785a 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4979,9 +4979,22 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
   /* Handle the case where we can reduce wider vectors to narrower ones.  */
   tree vectype = STMT_VINFO_VECTYPE (reduc_info);
   tree old_vectype = TREE_TYPE (accumulator->reduc_input);
+  unsigned HOST_WIDE_INT m;
   if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
-			    TYPE_VECTOR_SUBPARTS (vectype)))
+			    TYPE_VECTOR_SUBPARTS (vectype), &m))
     return false;
+  /* Check the intermediate vector types are available.  */
+  while (m > 2)
+    {
+      m /= 2;
+      tree intermediate_vectype = get_related_vectype_for_scalar_type
+	(TYPE_MODE (vectype), TREE_TYPE (vectype),
+	 exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m));
+      if (!intermediate_vectype
+	  || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
+				    intermediate_vectype))
+	return false;
+    }
 
   /* Non-SLP reductions might apply an adjustment after the reduction
      operation, in order to simplify the initialization of the accumulator.
author	Richard Biener <rguenther@suse.de>	2022-01-19 12:31:30 +0100
committer	Richard Biener <rguenther@suse.de>	2022-01-19 14:55:15 +0100
commit	5b6788ae5a365aecd6776f563e44022acb93a57a (patch)
tree	bee81495b90c4dc66434d00a6e3ffe311366d03c /gcc
parent	68aa3c08ef1c33d9ad1263f4b939638920de9f11 (diff)
download	gcc-5b6788ae5a365aecd6776f563e44022acb93a57a.zip gcc-5b6788ae5a365aecd6776f563e44022acb93a57a.tar.gz gcc-5b6788ae5a365aecd6776f563e44022acb93a57a.tar.bz2