diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c | 60 | ||||
-rw-r--r-- | gcc/tree-ssa-math-opts.c | 71 |
4 files changed, 115 insertions, 28 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1f71491..2ad6a66 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-07-30 Richard Sandiford <richard.sandiford@arm.com> + + * tree-ssa-math-opts.c (convert_mult_to_fma): Add a mul_cond + parameter. When nonnull, make sure that the addition or subtraction + has the same condition. + (math_opts_dom_walker::after_dom_children): Try convert_mult_to_fma + for CFN_COND_MUL too. + 2019-07-30 Richard Biener <rguenther@suse.de> PR tree-optimization/91291 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6c4292a..5a85698 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2019-07-30 Richard Sandiford <richard.sandiford@arm.com> + + * gcc.dg/vect/vect-cond-arith-7.c: New test. + 2019-07-30 Jakub Jelinek <jakub@redhat.com> PR middle-end/91282 diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c new file mode 100644 index 0000000..739b98f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c @@ -0,0 +1,60 @@ +/* { dg-require-effective-target scalar_all_fma } */ +/* { dg-additional-options "-fdump-tree-optimized -ffp-contract=fast" } */ + +#include "tree-vect.h" + +#define N (VECTOR_BITS * 11 / 64 + 3) + +#define DEF(INV) \ + void __attribute__ ((noipa)) \ + f_##INV (double *restrict a, double *restrict b, \ + double *restrict c, double *restrict d) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + double mb = (INV & 1 ? -b[i] : b[i]); \ + double mc = c[i]; \ + double md = (INV & 2 ? -d[i] : d[i]); \ + a[i] = b[i] < 10 ? mb * mc + md : 10.0; \ + } \ + } + +#define TEST(INV) \ + { \ + f_##INV (a, b, c, d); \ + for (int i = 0; i < N; ++i) \ + { \ + double mb = (INV & 1 ? -b[i] : b[i]); \ + double mc = c[i]; \ + double md = (INV & 2 ? -d[i] : d[i]); \ + double fma = __builtin_fma (mb, mc, md); \ + if (a[i] != (i % 17 < 10 ? fma : 10.0)) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +#define FOR_EACH_INV(T) \ + T (0) T (1) T (2) T (3) + +FOR_EACH_INV (DEF) + +int +main (void) +{ + double a[N], b[N], c[N], d[N]; + for (int i = 0; i < N; ++i) + { + b[i] = i % 17; + c[i] = i % 9 + 11; + d[i] = i % 13 + 14; + asm volatile ("" ::: "memory"); + } + FOR_EACH_INV (TEST) + return 0; +} + +/* { dg-final { scan-tree-dump-times { = \.COND_FMA } 1 "optimized" { target vect_double_cond_arith } } } */ +/* { dg-final { scan-tree-dump-times { = \.COND_FMS } 1 "optimized" { target vect_double_cond_arith } } } */ +/* { dg-final { scan-tree-dump-times { = \.COND_FNMA } 1 "optimized" { target vect_double_cond_arith } } } */ +/* { dg-final { scan-tree-dump-times { = \.COND_FNMS } 1 "optimized" { target vect_double_cond_arith } } } */ diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index b7bbde4..3e0b238 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -3044,6 +3044,8 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 with uses in additions and subtractions to form fused multiply-add operations. Returns true if successful and MUL_STMT should be removed. + If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional + on MUL_COND, otherwise it is unconditional. If STATE indicates that we are deferring FMA transformation, that means that we do not produce FMAs for basic blocks which look like: @@ -3060,7 +3062,7 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, static bool convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, - fma_deferring_state *state) + fma_deferring_state *state, tree mul_cond = NULL_TREE) { tree mul_result = gimple_get_lhs (mul_stmt); tree type = TREE_TYPE (mul_result); @@ -3174,6 +3176,9 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, return false; } + if (mul_cond && cond != mul_cond) + return false; + if (cond) { if (cond == result || else_value == result) @@ -3785,38 +3790,48 @@ math_opts_dom_walker::after_dom_children (basic_block bb) } else if (is_gimple_call (stmt)) { - tree fndecl = gimple_call_fndecl (stmt); - if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) + switch (gimple_call_combined_fn (stmt)) { - switch (DECL_FUNCTION_CODE (fndecl)) + CASE_CFN_POW: + if (gimple_call_lhs (stmt) + && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST + && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), + &dconst2) + && convert_mult_to_fma (stmt, + gimple_call_arg (stmt, 0), + gimple_call_arg (stmt, 0), + &fma_state)) { - case BUILT_IN_POWF: - case BUILT_IN_POW: - case BUILT_IN_POWL: - if (gimple_call_lhs (stmt) - && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST - && real_equal - (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), - &dconst2) - && convert_mult_to_fma (stmt, - gimple_call_arg (stmt, 0), - gimple_call_arg (stmt, 0), - &fma_state)) - { - unlink_stmt_vdef (stmt); - if (gsi_remove (&gsi, true) - && gimple_purge_dead_eh_edges (bb)) - *m_cfg_changed_p = true; - release_defs (stmt); - continue; - } - break; + unlink_stmt_vdef (stmt); + if (gsi_remove (&gsi, true) + && gimple_purge_dead_eh_edges (bb)) + *m_cfg_changed_p = true; + release_defs (stmt); + continue; + } + break; - default:; + case CFN_COND_MUL: + if (convert_mult_to_fma (stmt, + gimple_call_arg (stmt, 1), + gimple_call_arg (stmt, 2), + &fma_state, + gimple_call_arg (stmt, 0))) + + { + gsi_remove (&gsi, true); + release_defs (stmt); + continue; } + break; + + case CFN_LAST: + cancel_fma_deferring (&fma_state); + break; + + default: + break; } - else - cancel_fma_deferring (&fma_state); } gsi_next (&gsi); } |