aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFeng Xue <fxue@os.amperecomputing.com>2024-06-26 22:02:53 +0800
committerFeng Xue <fxue@os.amperecomputing.com>2024-06-30 14:46:40 +0800
commit1ff5f8f8a05dd57620a1e2abbf87bd511b113cce (patch)
tree84363c2233731872e54bb648c2f0ed75cac1a771
parent1bcfed4c52bb2410ea71bf6e4d46026e18461f84 (diff)
downloadgcc-1ff5f8f8a05dd57620a1e2abbf87bd511b113cce.zip
gcc-1ff5f8f8a05dd57620a1e2abbf87bd511b113cce.tar.gz
gcc-1ff5f8f8a05dd57620a1e2abbf87bd511b113cce.tar.bz2
vect: Fix shift-by-induction for single-lane slp
Allow shift-by-induction for slp node, when it is single lane, which is aligned with the original loop-based handling. 2024-06-26 Feng Xue <fxue@os.amperecomputing.com> gcc/ * tree-vect-stmts.cc (vectorizable_shift): Allow shift-by-induction for single-lane slp node. gcc/testsuite/ * gcc.dg/vect/vect-shift-6.c * gcc.dg/vect/vect-shift-7.c
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-shift-6.c52
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-shift-7.c69
-rw-r--r--gcc/tree-vect-stmts.cc2
3 files changed, 122 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-6.c b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c
new file mode 100644
index 0000000..277093b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdint.h>
+#include "tree-vect.h"
+
+#define N 32
+
+int32_t A[N];
+int32_t B[N];
+
+#define FN(name) \
+__attribute__((noipa)) \
+void name(int32_t *a) \
+{ \
+ for (int i = 0; i < N / 2; i++) \
+ { \
+ a[2 * i + 0] <<= i; \
+ a[2 * i + 1] <<= i; \
+ } \
+}
+
+
+FN(foo_vec)
+
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+FN(foo_novec)
+#pragma GCC pop_options
+
+int main ()
+{
+ int i;
+
+ check_vect ();
+
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ A[i] = B[i] = -(i + 1);
+
+ foo_vec(A);
+ foo_novec(B);
+
+ /* check results: */
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ if (A[i] != B[i])
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-7.c b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c
new file mode 100644
index 0000000..6de3f39
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c
@@ -0,0 +1,69 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "--param max-completely-peel-times=6" } */
+
+#include <stdarg.h>
+#include <stdint.h>
+#include "tree-vect.h"
+
+#define N 16
+#define M 16
+
+int32_t A[N];
+int32_t B[N];
+
+#define FN(name) \
+__attribute__((noipa)) \
+void name(int32_t *a, int m) \
+{ \
+ for (int i = 0; i < N / 2; i++) \
+ { \
+ int s1 = i; \
+ int s2 = s1 + 1; \
+ int32_t r1 = 0; \
+ int32_t r2 = 7; \
+ int32_t t1 = m; \
+ \
+ for (int j = 0; j < M; j++) \
+ { \
+ r1 += t1 << s1; \
+ r2 += t1 << s2; \
+ t1++; \
+ s1++; \
+ s2++; \
+ } \
+ \
+ a[2 * i + 0] = r1; \
+ a[2 * i + 1] = r2; \
+ } \
+}
+
+
+FN(foo_vec)
+
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+FN(foo_novec)
+#pragma GCC pop_options
+
+int main ()
+{
+ int i;
+
+ check_vect ();
+
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ A[i] = B[i] = 0;
+
+ foo_vec(A, 0);
+ foo_novec(B, 0);
+
+ /* check results: */
+#pragma GCC novector
+ for (i = 0; i < N; i++)
+ if (A[i] != B[i])
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 7b889f3..aab3aa5 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6175,7 +6175,7 @@ vectorizable_shift (vec_info *vinfo,
if ((dt[1] == vect_internal_def
|| dt[1] == vect_induction_def
|| dt[1] == vect_nested_cycle)
- && !slp_node)
+ && (!slp_node || SLP_TREE_LANES (slp_node) == 1))
scalar_shift_arg = false;
else if (dt[1] == vect_constant_def
|| dt[1] == vect_external_def