diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-05-24 12:34:18 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-05-24 12:34:18 +0000 |
commit | c453ccc2335bf4267a154c9385eb50a8c45235a1 (patch) | |
tree | a370c4868f5adf7c1f2a37c67c0b608933c93782 | |
parent | 0c08e1f85d96dd50de4289f05beb87b83b6d1d83 (diff) | |
download | gcc-c453ccc2335bf4267a154c9385eb50a8c45235a1.zip gcc-c453ccc2335bf4267a154c9385eb50a8c45235a1.tar.gz gcc-c453ccc2335bf4267a154c9385eb50a8c45235a1.tar.bz2 |
Use canonicalize_math_after_vectorization_p for FMA folds
The folds in r260348 kicked in before vectorisation, which hurts
for two reasons:
(1) the current suboptimal handling of nothrow meant that we could
drop the flag early and so prevent if-conversion
(2) some architectures provide more scalar forms than vector forms
(true for Advanced SIMD)
(1) is a bug in itself that needs to be fixed eventually, but delaying
the folds is still needed for (2).
2018-05-24 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* match.pd: Delay FMA folds until after vectorization.
gcc/testsuite/
* gcc.dg/vect/vect-fma-1.c: New test.
From-SVN: r260639
-rw-r--r-- | gcc/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/match.pd | 95 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-fma-1.c | 58 |
4 files changed, 114 insertions, 47 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9c7a386..1500fc0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2018-05-24 Richard Sandiford <richard.sandiford@linaro.org> + + * match.pd: Delay FMA folds until after vectorization. + 2018-05-24 Andre Vieira <andre.simoesdiasvieira@arm.com> PR target/83009 diff --git a/gcc/match.pd b/gcc/match.pd index 2f4c5e6..50f4c88 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4703,59 +4703,60 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) wi::to_wide (@ipos) + isize)) (BIT_FIELD_REF @0 @rsize @rpos))))) -(for fmas (FMA) +(if (canonicalize_math_after_vectorization_p ()) + (for fmas (FMA) + (simplify + (fmas:c (negate @0) @1 @2) + (IFN_FNMA @0 @1 @2)) + (simplify + (fmas @0 @1 (negate @2)) + (IFN_FMS @0 @1 @2)) + (simplify + (fmas:c (negate @0) @1 (negate @2)) + (IFN_FNMS @0 @1 @2)) + (simplify + (negate (fmas@3 @0 @1 @2)) + (if (single_use (@3)) + (IFN_FNMS @0 @1 @2)))) + (simplify - (fmas:c (negate @0) @1 @2) + (IFN_FMS:c (negate @0) @1 @2) + (IFN_FNMS @0 @1 @2)) + (simplify + (IFN_FMS @0 @1 (negate @2)) + (IFN_FMA @0 @1 @2)) + (simplify + (IFN_FMS:c (negate @0) @1 (negate @2)) (IFN_FNMA @0 @1 @2)) (simplify - (fmas @0 @1 (negate @2)) - (IFN_FMS @0 @1 @2)) + (negate (IFN_FMS@3 @0 @1 @2)) + (if (single_use (@3)) + (IFN_FNMA @0 @1 @2))) + + (simplify + (IFN_FNMA:c (negate @0) @1 @2) + (IFN_FMA @0 @1 @2)) (simplify - (fmas:c (negate @0) @1 (negate @2)) + (IFN_FNMA @0 @1 (negate @2)) (IFN_FNMS @0 @1 @2)) (simplify - (negate (fmas@3 @0 @1 @2)) + (IFN_FNMA:c (negate @0) @1 (negate @2)) + (IFN_FMS @0 @1 @2)) + (simplify + (negate (IFN_FNMA@3 @0 @1 @2)) (if (single_use (@3)) - (IFN_FNMS @0 @1 @2)))) + (IFN_FMS @0 @1 @2))) -(simplify - (IFN_FMS:c (negate @0) @1 @2) - (IFN_FNMS @0 @1 @2)) -(simplify - (IFN_FMS @0 @1 (negate @2)) - (IFN_FMA @0 @1 @2)) -(simplify - (IFN_FMS:c (negate @0) @1 (negate @2)) - (IFN_FNMA @0 @1 @2)) -(simplify - (negate (IFN_FMS@3 @0 @1 @2)) + (simplify + (IFN_FNMS:c (negate @0) @1 @2) + (IFN_FMS @0 @1 @2)) + (simplify + (IFN_FNMS @0 @1 (negate @2)) + (IFN_FNMA @0 @1 @2)) + (simplify + (IFN_FNMS:c (negate @0) @1 (negate @2)) + (IFN_FMA @0 @1 @2)) + (simplify + (negate (IFN_FNMS@3 @0 @1 @2)) (if (single_use (@3)) - (IFN_FNMA @0 @1 @2))) - -(simplify - (IFN_FNMA:c (negate @0) @1 @2) - (IFN_FMA @0 @1 @2)) -(simplify - (IFN_FNMA @0 @1 (negate @2)) - (IFN_FNMS @0 @1 @2)) -(simplify - (IFN_FNMA:c (negate @0) @1 (negate @2)) - (IFN_FMS @0 @1 @2)) -(simplify - (negate (IFN_FNMA@3 @0 @1 @2)) - (if (single_use (@3)) - (IFN_FMS @0 @1 @2))) - -(simplify - (IFN_FNMS:c (negate @0) @1 @2) - (IFN_FMS @0 @1 @2)) -(simplify - (IFN_FNMS @0 @1 (negate @2)) - (IFN_FNMA @0 @1 @2)) -(simplify - (IFN_FNMS:c (negate @0) @1 (negate @2)) - (IFN_FMA @0 @1 @2)) -(simplify - (negate (IFN_FNMS@3 @0 @1 @2)) - (if (single_use (@3)) - (IFN_FMA @0 @1 @2))) + (IFN_FMA @0 @1 @2)))) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ee97792..a04a327 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2018-05-24 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.dg/vect/vect-fma-1.c: New test. + 2018-05-24 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> * gcc.dg/tree-prof/update-loopch.c: Fix dumpfile name in diff --git a/gcc/testsuite/gcc.dg/vect/vect-fma-1.c b/gcc/testsuite/gcc.dg/vect/vect-fma-1.c new file mode 100644 index 0000000..6b6b4f7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-fma-1.c @@ -0,0 +1,58 @@ +/* { dg-require-effective-target scalar_all_fma } */ + +#include "tree-vect.h" + +#define N (VECTOR_BITS * 11 / 64 + 3) + +#define DEF(INV) \ + void __attribute__ ((noipa)) \ + f_##INV (double *restrict a, double *restrict b, \ + double *restrict c, double *restrict d) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + double mb = (INV & 1 ? -b[i] : b[i]); \ + double mc = c[i]; \ + double md = (INV & 2 ? -d[i] : d[i]); \ + double fma = __builtin_fma (mb, mc, md); \ + a[i] = (INV & 4 ? -fma : fma); \ + } \ + } + +#define TEST(INV) \ + { \ + f_##INV (a, b, c, d); \ + for (int i = 0; i < N; ++i) \ + { \ + double mb = (INV & 1 ? -b[i] : b[i]); \ + double mc = c[i]; \ + double md = (INV & 2 ? -d[i] : d[i]); \ + double fma = __builtin_fma (mb, mc, md); \ + double expected = (INV & 4 ? -fma : fma); \ + if (a[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +#define FOR_EACH_INV(T) \ + T (0) T (1) T (2) T (3) T (4) T (5) T (6) T (7) + +FOR_EACH_INV (DEF) + +int +main (void) +{ + double a[N], b[N], c[N], d[N]; + for (int i = 0; i < N; ++i) + { + b[i] = i % 17; + c[i] = i % 9 + 11; + d[i] = i % 13 + 14; + asm volatile ("" ::: "memory"); + } + FOR_EACH_INV (TEST) + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 8 "vect" { target vect_double } } } */ |