aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-07-30 10:59:29 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-07-30 10:59:29 +0000
commitc1b3d827832f883e0634b18c88eb2bbde335aa42 (patch)
tree74af98cf646a4ffe37dee618dc42f2a01020ce7b /gcc
parent8c955a4b9ca99dc1557dd4547068abf7a0838c8f (diff)
downloadgcc-c1b3d827832f883e0634b18c88eb2bbde335aa42.zip
gcc-c1b3d827832f883e0634b18c88eb2bbde335aa42.tar.gz
gcc-c1b3d827832f883e0634b18c88eb2bbde335aa42.tar.bz2
Handle IFN_COND_MUL in tree-ssa-math-opts.c
This patch extends the FMA handling in tree-ssa-math-opts.c so that it can cope with conditional multiplications as well as unconditional multiplications. The addition or subtraction must then have the same condition as the multiplication (at least for now). E.g. we can currently fold: (IFN_COND_ADD cond (mul x y) z fallback) -> (IFN_COND_FMA cond x y z fallback) This patch also allows: (IFN_COND_ADD cond (IFN_COND_MUL cond x y <whatever>) z fallback) -> (IFN_COND_FMA cond x y z fallback) 2019-07-30 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-ssa-math-opts.c (convert_mult_to_fma): Add a mul_cond parameter. When nonnull, make sure that the addition or subtraction has the same condition. (math_opts_dom_walker::after_dom_children): Try convert_mult_to_fma for CFN_COND_MUL too. gcc/testsuite/ * gcc.dg/vect/vect-cond-arith-7.c: New test. From-SVN: r273905
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c60
-rw-r--r--gcc/tree-ssa-math-opts.c71
4 files changed, 115 insertions, 28 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1f71491..2ad6a66 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2019-07-30 Richard Sandiford <richard.sandiford@arm.com>
+
+ * tree-ssa-math-opts.c (convert_mult_to_fma): Add a mul_cond
+ parameter. When nonnull, make sure that the addition or subtraction
+ has the same condition.
+ (math_opts_dom_walker::after_dom_children): Try convert_mult_to_fma
+ for CFN_COND_MUL too.
+
2019-07-30 Richard Biener <rguenther@suse.de>
PR tree-optimization/91291
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6c4292a..5a85698 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2019-07-30 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.dg/vect/vect-cond-arith-7.c: New test.
+
2019-07-30 Jakub Jelinek <jakub@redhat.com>
PR middle-end/91282
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c
new file mode 100644
index 0000000..739b98f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target scalar_all_fma } */
+/* { dg-additional-options "-fdump-tree-optimized -ffp-contract=fast" } */
+
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS * 11 / 64 + 3)
+
+#define DEF(INV) \
+ void __attribute__ ((noipa)) \
+ f_##INV (double *restrict a, double *restrict b, \
+ double *restrict c, double *restrict d) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ { \
+ double mb = (INV & 1 ? -b[i] : b[i]); \
+ double mc = c[i]; \
+ double md = (INV & 2 ? -d[i] : d[i]); \
+ a[i] = b[i] < 10 ? mb * mc + md : 10.0; \
+ } \
+ }
+
+#define TEST(INV) \
+ { \
+ f_##INV (a, b, c, d); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ double mb = (INV & 1 ? -b[i] : b[i]); \
+ double mc = c[i]; \
+ double md = (INV & 2 ? -d[i] : d[i]); \
+ double fma = __builtin_fma (mb, mc, md); \
+ if (a[i] != (i % 17 < 10 ? fma : 10.0)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+#define FOR_EACH_INV(T) \
+ T (0) T (1) T (2) T (3)
+
+FOR_EACH_INV (DEF)
+
+int
+main (void)
+{
+ double a[N], b[N], c[N], d[N];
+ for (int i = 0; i < N; ++i)
+ {
+ b[i] = i % 17;
+ c[i] = i % 9 + 11;
+ d[i] = i % 13 + 14;
+ asm volatile ("" ::: "memory");
+ }
+ FOR_EACH_INV (TEST)
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times { = \.COND_FMA } 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_FMS } 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_FNMA } 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_FNMS } 1 "optimized" { target vect_double_cond_arith } } } */
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index b7bbde4..3e0b238 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -3044,6 +3044,8 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
with uses in additions and subtractions to form fused multiply-add
operations. Returns true if successful and MUL_STMT should be removed.
+ If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
+ on MUL_COND, otherwise it is unconditional.
If STATE indicates that we are deferring FMA transformation, that means
that we do not produce FMAs for basic blocks which look like:
@@ -3060,7 +3062,7 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
static bool
convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
- fma_deferring_state *state)
+ fma_deferring_state *state, tree mul_cond = NULL_TREE)
{
tree mul_result = gimple_get_lhs (mul_stmt);
tree type = TREE_TYPE (mul_result);
@@ -3174,6 +3176,9 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
return false;
}
+ if (mul_cond && cond != mul_cond)
+ return false;
+
if (cond)
{
if (cond == result || else_value == result)
@@ -3785,38 +3790,48 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
}
else if (is_gimple_call (stmt))
{
- tree fndecl = gimple_call_fndecl (stmt);
- if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
+ switch (gimple_call_combined_fn (stmt))
{
- switch (DECL_FUNCTION_CODE (fndecl))
+ CASE_CFN_POW:
+ if (gimple_call_lhs (stmt)
+ && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
+ && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
+ &dconst2)
+ && convert_mult_to_fma (stmt,
+ gimple_call_arg (stmt, 0),
+ gimple_call_arg (stmt, 0),
+ &fma_state))
{
- case BUILT_IN_POWF:
- case BUILT_IN_POW:
- case BUILT_IN_POWL:
- if (gimple_call_lhs (stmt)
- && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
- && real_equal
- (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
- &dconst2)
- && convert_mult_to_fma (stmt,
- gimple_call_arg (stmt, 0),
- gimple_call_arg (stmt, 0),
- &fma_state))
- {
- unlink_stmt_vdef (stmt);
- if (gsi_remove (&gsi, true)
- && gimple_purge_dead_eh_edges (bb))
- *m_cfg_changed_p = true;
- release_defs (stmt);
- continue;
- }
- break;
+ unlink_stmt_vdef (stmt);
+ if (gsi_remove (&gsi, true)
+ && gimple_purge_dead_eh_edges (bb))
+ *m_cfg_changed_p = true;
+ release_defs (stmt);
+ continue;
+ }
+ break;
- default:;
+ case CFN_COND_MUL:
+ if (convert_mult_to_fma (stmt,
+ gimple_call_arg (stmt, 1),
+ gimple_call_arg (stmt, 2),
+ &fma_state,
+ gimple_call_arg (stmt, 0)))
+
+ {
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ continue;
}
+ break;
+
+ case CFN_LAST:
+ cancel_fma_deferring (&fma_state);
+ break;
+
+ default:
+ break;
}
- else
- cancel_fma_deferring (&fma_state);
}
gsi_next (&gsi);
}