diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2019-01-07 12:17:10 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2019-01-07 12:17:10 +0000 |
commit | f4bf2aabe36633d75852313caafe7efab71d5ba7 (patch) | |
tree | 4b1605d010ed5331fcd70d90790c5ce41dd2f7c2 /gcc | |
parent | 46c66a46aa33077bda821e0428cc7859945c04c8 (diff) | |
download | gcc-f4bf2aabe36633d75852313caafe7efab71d5ba7.zip gcc-f4bf2aabe36633d75852313caafe7efab71d5ba7.tar.gz gcc-f4bf2aabe36633d75852313caafe7efab71d5ba7.tar.bz2 |
[2/2] PR88598: Optimise reduc (bit_and)
This patch folds certain reductions of X & CST to X[I] & CST[I] if I is
the only nonzero element of CST. This includes the motivating case in
which CST[I] is -1.
We could do the same for REDUC_MAX on unsigned types, but I wasn't sure
that that special case was worth it.
2019-01-07 Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR tree-optimization/88598
* tree.h (single_nonzero_element): Declare.
* tree.c (single_nonzero_element): New function.
* match.pd: Fold certain reductions of X & CST to X[I] & CST[I]
if I is the only nonzero element of CST.
gcc/testsuite/
PR tree-optimization/88598
* gcc.dg/vect/pr88598-1.c: New test.
* gcc.dg/vect/pr88598-2.c: Likewise.
* gcc.dg/vect/pr88598-3.c: Likewise.
* gcc.dg/vect/pr88598-4.c: Likewise.
* gcc.dg/vect/pr88598-5.c: Likewise.
* gcc.dg/vect/pr88598-6.c: Likewise.
From-SVN: r267646
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/match.pd | 16 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr88598-1.c | 55 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr88598-2.c | 55 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr88598-3.c | 55 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr88598-4.c | 51 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr88598-5.c | 51 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr88598-6.c | 51 | ||||
-rw-r--r-- | gcc/tree.c | 32 | ||||
-rw-r--r-- | gcc/tree.h | 2 |
11 files changed, 386 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b3dcd52..e5f3ad23 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,6 +1,14 @@ 2019-01-07 Richard Sandiford <richard.sandiford@arm.com> PR tree-optimization/88598 + * tree.h (single_nonzero_element): Declare. + * tree.c (single_nonzero_element): New function. + * match.pd: Fold certain reductions of X & CST to X[I] & CST[I] + if I is the only nonzero element of CST. + +2019-01-07 Richard Sandiford <richard.sandiford@arm.com> + + PR tree-optimization/88598 * tree.h (initializer_each_zero_or_onep): Declare. * tree.c (initializer_each_zero_or_onep): New function. (signed_or_unsigned_type_for): Handle float types too. diff --git a/gcc/match.pd b/gcc/match.pd index 4fe379f..60b12f9 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5251,3 +5251,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) { wide_int_to_tree (sizetype, off); }) { swap_p ? @0 : @2; })) { rhs_tree; }))))))))) + +/* Fold REDUC (@0 & @1) -> @0[I] & @1[I] if element I is the only nonzero + element of @1. */ +(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR) + (simplify (reduc (view_convert? (bit_and @0 VECTOR_CST@1))) + (with { int i = single_nonzero_element (@1); } + (if (i >= 0) + (with { tree elt = vector_cst_elt (@1, i); + tree elt_type = TREE_TYPE (elt); + unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type)); + tree size = bitsize_int (elt_bits); + tree pos = bitsize_int (elt_bits * i); } + (view_convert + (bit_and:elt_type + (BIT_FIELD_REF:elt_type @0 { size; } { pos; }) + { elt; }))))))) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 707c95d..0dc5707 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,6 +1,16 @@ 2019-01-07 Richard Sandiford <richard.sandiford@arm.com> PR tree-optimization/88598 + * gcc.dg/vect/pr88598-1.c: New test. + * gcc.dg/vect/pr88598-2.c: Likewise. + * gcc.dg/vect/pr88598-3.c: Likewise. + * gcc.dg/vect/pr88598-4.c: Likewise. + * gcc.dg/vect/pr88598-5.c: Likewise. + * gcc.dg/vect/pr88598-6.c: Likewise. + +2019-01-07 Richard Sandiford <richard.sandiford@arm.com> + + PR tree-optimization/88598 * gcc.dg/pr88598-1.c: New test. * gcc.dg/pr88598-2.c: Likewise. * gcc.dg/pr88598-3.c: Likewise. diff --git a/gcc/testsuite/gcc.dg/vect/pr88598-1.c b/gcc/testsuite/gcc.dg/vect/pr88598-1.c new file mode 100644 index 0000000..1d119ee --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr88598-1.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include "tree-vect.h" + +#define N 4 + +int a[N]; + +int __attribute__ ((noipa)) +f1 (void) +{ + int b[N] = { 15, 0, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] & b[i]; + return res; +} + +int __attribute__ ((noipa)) +f2 (void) +{ + int b[N] = { 0, 31, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] & b[i]; + return res; +} + +int __attribute__ ((noipa)) +f3 (void) +{ + int b[N] = { 0, 0, 0, -1 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] & b[i]; + return res; +} + +int +main () +{ + check_vect (); + + for (int i = 0; i < N; ++i) + a[i] = 0xe0 + i; + + if (f1 () != (a[0] & 15) + || f2 () != (a[1] & 31) + || f3 () != a[N - 1]) + __builtin_abort (); + + return 0; +} + +/* ??? We need more constant folding for this to work with fully-masked + loops. */ +/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr88598-2.c b/gcc/testsuite/gcc.dg/vect/pr88598-2.c new file mode 100644 index 0000000..837cd5d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr88598-2.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include "tree-vect.h" + +#define N 4 + +int a[N]; + +int __attribute__ ((noipa)) +f1 (void) +{ + int b[N] = { 1, 0, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +int __attribute__ ((noipa)) +f2 (void) +{ + int b[N] = { 0, 1, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +int __attribute__ ((noipa)) +f3 (void) +{ + int b[N] = { 0, 0, 0, 1 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +int +main () +{ + check_vect (); + + for (int i = 0; i < N; ++i) + a[i] = 0xe0 + i; + + if (f1 () != a[0] + || f2 () != a[1] + || f3 () != a[N - 1]) + __builtin_abort (); + + return 0; +} + +/* ??? We need more constant folding for this to work with fully-masked + loops. */ +/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr88598-3.c b/gcc/testsuite/gcc.dg/vect/pr88598-3.c new file mode 100644 index 0000000..ee034ec --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr88598-3.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ffast-math -fdump-tree-optimized" } */ + +#include "tree-vect.h" + +#define N 4 + +float a[N]; + +float __attribute__ ((noipa)) +f1 (void) +{ + float b[N] = { 1, 0, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +float __attribute__ ((noipa)) +f2 (void) +{ + float b[N] = { 0, 1, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +float __attribute__ ((noipa)) +f3 (void) +{ + float b[N] = { 0, 0, 0, 1 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +int +main () +{ + check_vect (); + + for (int i = 0; i < N; ++i) + a[i] = 0xe0 + i; + + if (f1 () != a[0] + || f2 () != a[1] + || f3 () != a[N - 1]) + __builtin_abort (); + + return 0; +} + +/* ??? We need more constant folding for this to work with fully-masked + loops. */ +/* { dg-final { scan-tree-dump-not {REDUC_PLUS} "optimized" { xfail aarch64_sve } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr88598-4.c b/gcc/testsuite/gcc.dg/vect/pr88598-4.c new file mode 100644 index 0000000..b5cf08d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr88598-4.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include "tree-vect.h" + +#define N 4 + +int a[N]; + +int __attribute__ ((noipa)) +f1 (void) +{ + int b[N] = { 0, 15, 15, 15 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] & b[i]; + return res; +} + +int __attribute__ ((noipa)) +f2 (void) +{ + int b[N] = { 0, 31, 0, 31 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] & b[i]; + return res; +} + +int __attribute__ ((noipa)) +f3 (void) +{ + int b[N] = { -1, -1, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] & b[i]; + return res; +} + +int +main () +{ + check_vect (); + + for (int i = 0; i < N; ++i) + a[i] = 0xe0 + i; + + if (f1 () != (a[1] & 15) + (a[2] & 15) + (a[3] & 15) + || f2 () != (a[1] & 31) + (a[3] & 31) + || f3 () != a[0] + a[1]) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr88598-5.c b/gcc/testsuite/gcc.dg/vect/pr88598-5.c new file mode 100644 index 0000000..b3b9843 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr88598-5.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include "tree-vect.h" + +#define N 4 + +int a[N]; + +int __attribute__ ((noipa)) +f1 (void) +{ + int b[N] = { 0, 1, 1, 1 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +int __attribute__ ((noipa)) +f2 (void) +{ + int b[N] = { 0, 1, 0, 1 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +int __attribute__ ((noipa)) +f3 (void) +{ + int b[N] = { 1, 1, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +int +main () +{ + check_vect (); + + for (int i = 0; i < N; ++i) + a[i] = 0xe0 + i; + + if (f1 () != a[1] + a[2] + a[3] + || f2 () != a[1] + a[3] + || f3 () != a[0] + a[1]) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr88598-6.c b/gcc/testsuite/gcc.dg/vect/pr88598-6.c new file mode 100644 index 0000000..f04e32d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr88598-6.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ffast-math -fdump-tree-optimized" } */ + +#include "tree-vect.h" + +#define N 4 + +float a[N]; + +float __attribute__ ((noipa)) +f1 (void) +{ + float b[N] = { 0, 1, 1, 1 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +float __attribute__ ((noipa)) +f2 (void) +{ + float b[N] = { 0, 1, 0, 1 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +float __attribute__ ((noipa)) +f3 (void) +{ + float b[N] = { 1, 1, 0, 0 }, res = 0; + for (int i = 0; i < N; ++i) + res += a[i] * b[i]; + return res; +} + +int +main () +{ + check_vect (); + + for (int i = 0; i < N; ++i) + a[i] = 0xe0 + i; + + if (f1 () != a[1] + a[2] + a[3] + || f2 () != a[1] + a[3] + || f3 () != a[0] + a[1]) + __builtin_abort (); + + return 0; +} @@ -11340,6 +11340,38 @@ uniform_integer_cst_p (tree t) return NULL_TREE; } +/* If VECTOR_CST T has a single nonzero element, return the index of that + element, otherwise return -1. */ + +int +single_nonzero_element (const_tree t) +{ + unsigned HOST_WIDE_INT nelts; + unsigned int repeat_nelts; + if (VECTOR_CST_NELTS (t).is_constant (&nelts)) + repeat_nelts = nelts; + else if (VECTOR_CST_NELTS_PER_PATTERN (t) == 2) + { + nelts = vector_cst_encoded_nelts (t); + repeat_nelts = VECTOR_CST_NPATTERNS (t); + } + else + return -1; + + int res = -1; + for (unsigned int i = 0; i < nelts; ++i) + { + tree elt = vector_cst_elt (t, i); + if (!integer_zerop (elt) && !real_zerop (elt)) + { + if (res >= 0 || i >= repeat_nelts) + return -1; + res = i; + } + } + return res; +} + /* Build an empty statement at location LOC. */ tree @@ -4522,6 +4522,8 @@ extern tree uniform_vector_p (const_tree); extern tree uniform_integer_cst_p (tree); +extern int single_nonzero_element (const_tree); + /* Given a CONSTRUCTOR CTOR, return the element values as a vector. */ extern vec<tree, va_gc> *ctor_to_vec (tree); |