aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-05-24 12:34:18 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-05-24 12:34:18 +0000
commitc453ccc2335bf4267a154c9385eb50a8c45235a1 (patch)
treea370c4868f5adf7c1f2a37c67c0b608933c93782
parent0c08e1f85d96dd50de4289f05beb87b83b6d1d83 (diff)
downloadgcc-c453ccc2335bf4267a154c9385eb50a8c45235a1.zip
gcc-c453ccc2335bf4267a154c9385eb50a8c45235a1.tar.gz
gcc-c453ccc2335bf4267a154c9385eb50a8c45235a1.tar.bz2
Use canonicalize_math_after_vectorization_p for FMA folds
The folds in r260348 kicked in before vectorisation, which hurts for two reasons: (1) the current suboptimal handling of nothrow meant that we could drop the flag early and so prevent if-conversion (2) some architectures provide more scalar forms than vector forms (true for Advanced SIMD) (1) is a bug in itself that needs to be fixed eventually, but delaying the folds is still needed for (2). 2018-05-24 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * match.pd: Delay FMA folds until after vectorization. gcc/testsuite/ * gcc.dg/vect/vect-fma-1.c: New test. From-SVN: r260639
-rw-r--r--gcc/ChangeLog4
-rw-r--r--gcc/match.pd95
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-fma-1.c58
4 files changed, 114 insertions, 47 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9c7a386..1500fc0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2018-05-24 Richard Sandiford <richard.sandiford@linaro.org>
+
+ * match.pd: Delay FMA folds until after vectorization.
+
2018-05-24 Andre Vieira <andre.simoesdiasvieira@arm.com>
PR target/83009
diff --git a/gcc/match.pd b/gcc/match.pd
index 2f4c5e6..50f4c88 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4703,59 +4703,60 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
wi::to_wide (@ipos) + isize))
(BIT_FIELD_REF @0 @rsize @rpos)))))
-(for fmas (FMA)
+(if (canonicalize_math_after_vectorization_p ())
+ (for fmas (FMA)
+ (simplify
+ (fmas:c (negate @0) @1 @2)
+ (IFN_FNMA @0 @1 @2))
+ (simplify
+ (fmas @0 @1 (negate @2))
+ (IFN_FMS @0 @1 @2))
+ (simplify
+ (fmas:c (negate @0) @1 (negate @2))
+ (IFN_FNMS @0 @1 @2))
+ (simplify
+ (negate (fmas@3 @0 @1 @2))
+ (if (single_use (@3))
+ (IFN_FNMS @0 @1 @2))))
+
(simplify
- (fmas:c (negate @0) @1 @2)
+ (IFN_FMS:c (negate @0) @1 @2)
+ (IFN_FNMS @0 @1 @2))
+ (simplify
+ (IFN_FMS @0 @1 (negate @2))
+ (IFN_FMA @0 @1 @2))
+ (simplify
+ (IFN_FMS:c (negate @0) @1 (negate @2))
(IFN_FNMA @0 @1 @2))
(simplify
- (fmas @0 @1 (negate @2))
- (IFN_FMS @0 @1 @2))
+ (negate (IFN_FMS@3 @0 @1 @2))
+ (if (single_use (@3))
+ (IFN_FNMA @0 @1 @2)))
+
+ (simplify
+ (IFN_FNMA:c (negate @0) @1 @2)
+ (IFN_FMA @0 @1 @2))
(simplify
- (fmas:c (negate @0) @1 (negate @2))
+ (IFN_FNMA @0 @1 (negate @2))
(IFN_FNMS @0 @1 @2))
(simplify
- (negate (fmas@3 @0 @1 @2))
+ (IFN_FNMA:c (negate @0) @1 (negate @2))
+ (IFN_FMS @0 @1 @2))
+ (simplify
+ (negate (IFN_FNMA@3 @0 @1 @2))
(if (single_use (@3))
- (IFN_FNMS @0 @1 @2))))
+ (IFN_FMS @0 @1 @2)))
-(simplify
- (IFN_FMS:c (negate @0) @1 @2)
- (IFN_FNMS @0 @1 @2))
-(simplify
- (IFN_FMS @0 @1 (negate @2))
- (IFN_FMA @0 @1 @2))
-(simplify
- (IFN_FMS:c (negate @0) @1 (negate @2))
- (IFN_FNMA @0 @1 @2))
-(simplify
- (negate (IFN_FMS@3 @0 @1 @2))
+ (simplify
+ (IFN_FNMS:c (negate @0) @1 @2)
+ (IFN_FMS @0 @1 @2))
+ (simplify
+ (IFN_FNMS @0 @1 (negate @2))
+ (IFN_FNMA @0 @1 @2))
+ (simplify
+ (IFN_FNMS:c (negate @0) @1 (negate @2))
+ (IFN_FMA @0 @1 @2))
+ (simplify
+ (negate (IFN_FNMS@3 @0 @1 @2))
(if (single_use (@3))
- (IFN_FNMA @0 @1 @2)))
-
-(simplify
- (IFN_FNMA:c (negate @0) @1 @2)
- (IFN_FMA @0 @1 @2))
-(simplify
- (IFN_FNMA @0 @1 (negate @2))
- (IFN_FNMS @0 @1 @2))
-(simplify
- (IFN_FNMA:c (negate @0) @1 (negate @2))
- (IFN_FMS @0 @1 @2))
-(simplify
- (negate (IFN_FNMA@3 @0 @1 @2))
- (if (single_use (@3))
- (IFN_FMS @0 @1 @2)))
-
-(simplify
- (IFN_FNMS:c (negate @0) @1 @2)
- (IFN_FMS @0 @1 @2))
-(simplify
- (IFN_FNMS @0 @1 (negate @2))
- (IFN_FNMA @0 @1 @2))
-(simplify
- (IFN_FNMS:c (negate @0) @1 (negate @2))
- (IFN_FMA @0 @1 @2))
-(simplify
- (negate (IFN_FNMS@3 @0 @1 @2))
- (if (single_use (@3))
- (IFN_FMA @0 @1 @2)))
+ (IFN_FMA @0 @1 @2))))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ee97792..a04a327 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2018-05-24 Richard Sandiford <richard.sandiford@linaro.org>
+
+ * gcc.dg/vect/vect-fma-1.c: New test.
+
2018-05-24 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* gcc.dg/tree-prof/update-loopch.c: Fix dumpfile name in
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fma-1.c b/gcc/testsuite/gcc.dg/vect/vect-fma-1.c
new file mode 100644
index 0000000..6b6b4f7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fma-1.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target scalar_all_fma } */
+
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS * 11 / 64 + 3)
+
+#define DEF(INV) \
+ void __attribute__ ((noipa)) \
+ f_##INV (double *restrict a, double *restrict b, \
+ double *restrict c, double *restrict d) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ { \
+ double mb = (INV & 1 ? -b[i] : b[i]); \
+ double mc = c[i]; \
+ double md = (INV & 2 ? -d[i] : d[i]); \
+ double fma = __builtin_fma (mb, mc, md); \
+ a[i] = (INV & 4 ? -fma : fma); \
+ } \
+ }
+
+#define TEST(INV) \
+ { \
+ f_##INV (a, b, c, d); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ double mb = (INV & 1 ? -b[i] : b[i]); \
+ double mc = c[i]; \
+ double md = (INV & 2 ? -d[i] : d[i]); \
+ double fma = __builtin_fma (mb, mc, md); \
+ double expected = (INV & 4 ? -fma : fma); \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+#define FOR_EACH_INV(T) \
+ T (0) T (1) T (2) T (3) T (4) T (5) T (6) T (7)
+
+FOR_EACH_INV (DEF)
+
+int
+main (void)
+{
+ double a[N], b[N], c[N], d[N];
+ for (int i = 0; i < N; ++i)
+ {
+ b[i] = i % 17;
+ c[i] = i % 9 + 11;
+ d[i] = i % 13 + 14;
+ asm volatile ("" ::: "memory");
+ }
+ FOR_EACH_INV (TEST)
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 8 "vect" { target vect_double } } } */