diff options
author | Feng Xue <fxue@os.amperecomputing.com> | 2024-06-26 22:02:53 +0800 |
---|---|---|
committer | Feng Xue <fxue@os.amperecomputing.com> | 2024-06-30 14:46:40 +0800 |
commit | 1ff5f8f8a05dd57620a1e2abbf87bd511b113cce (patch) | |
tree | 84363c2233731872e54bb648c2f0ed75cac1a771 | |
parent | 1bcfed4c52bb2410ea71bf6e4d46026e18461f84 (diff) | |
download | gcc-1ff5f8f8a05dd57620a1e2abbf87bd511b113cce.zip gcc-1ff5f8f8a05dd57620a1e2abbf87bd511b113cce.tar.gz gcc-1ff5f8f8a05dd57620a1e2abbf87bd511b113cce.tar.bz2 |
vect: Fix shift-by-induction for single-lane slp
Allow shift-by-induction for slp node, when it is single lane, which is
aligned with the original loop-based handling.
2024-06-26 Feng Xue <fxue@os.amperecomputing.com>
gcc/
* tree-vect-stmts.cc (vectorizable_shift): Allow shift-by-induction
for single-lane slp node.
gcc/testsuite/
* gcc.dg/vect/vect-shift-6.c
* gcc.dg/vect/vect-shift-7.c
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-shift-6.c | 52 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-shift-7.c | 69 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.cc | 2 |
3 files changed, 122 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-6.c b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c new file mode 100644 index 0000000..277093b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_shift } */ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include <stdint.h> +#include "tree-vect.h" + +#define N 32 + +int32_t A[N]; +int32_t B[N]; + +#define FN(name) \ +__attribute__((noipa)) \ +void name(int32_t *a) \ +{ \ + for (int i = 0; i < N / 2; i++) \ + { \ + a[2 * i + 0] <<= i; \ + a[2 * i + 1] <<= i; \ + } \ +} + + +FN(foo_vec) + +#pragma GCC push_options +#pragma GCC optimize ("O0") +FN(foo_novec) +#pragma GCC pop_options + +int main () +{ + int i; + + check_vect (); + +#pragma GCC novector + for (i = 0; i < N; i++) + A[i] = B[i] = -(i + 1); + + foo_vec(A); + foo_novec(B); + + /* check results: */ +#pragma GCC novector + for (i = 0; i < N; i++) + if (A[i] != B[i]) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-7.c b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c new file mode 100644 index 0000000..6de3f39 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c @@ -0,0 +1,69 @@ +/* { dg-require-effective-target vect_shift } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "--param max-completely-peel-times=6" } */ + +#include <stdarg.h> +#include <stdint.h> +#include "tree-vect.h" + +#define N 16 +#define M 16 + +int32_t A[N]; +int32_t B[N]; + +#define FN(name) \ +__attribute__((noipa)) \ +void name(int32_t *a, int m) \ +{ \ + for (int i = 0; i < N / 2; i++) \ + { \ + int s1 = i; \ + int s2 = s1 + 1; \ + int32_t r1 = 0; \ + int32_t r2 = 7; \ + int32_t t1 = m; \ + \ + for (int j = 0; j < M; j++) \ + { \ + r1 += t1 << s1; \ + r2 += t1 << s2; \ + t1++; \ + s1++; \ + s2++; \ + } \ + \ + a[2 * i + 0] = r1; \ + a[2 * i + 1] = r2; \ + } \ +} + + +FN(foo_vec) + +#pragma GCC push_options +#pragma GCC optimize ("O0") +FN(foo_novec) +#pragma GCC pop_options + +int main () +{ + int i; + + check_vect (); + +#pragma GCC novector + for (i = 0; i < N; i++) + A[i] = B[i] = 0; + + foo_vec(A, 0); + foo_novec(B, 0); + + /* check results: */ +#pragma GCC novector + for (i = 0; i < N; i++) + if (A[i] != B[i]) + abort (); + + return 0; +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 7b889f3..aab3aa5 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -6175,7 +6175,7 @@ vectorizable_shift (vec_info *vinfo, if ((dt[1] == vect_internal_def || dt[1] == vect_induction_def || dt[1] == vect_nested_cycle) - && !slp_node) + && (!slp_node || SLP_TREE_LANES (slp_node) == 1)) scalar_shift_arg = false; else if (dt[1] == vect_constant_def || dt[1] == vect_external_def |