aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-math-opts.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-07-12 13:01:48 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-07-12 13:01:48 +0000
commit0936858f081b77319f8f6e5825dc86d2861d0445 (patch)
tree87c7aa8363d38fe0e4022d33f8a25f14b6dc8ff8 /gcc/tree-ssa-math-opts.c
parentb41d1f6ed753bf7ae7e68f745e50c26ee65b5711 (diff)
downloadgcc-0936858f081b77319f8f6e5825dc86d2861d0445.zip
gcc-0936858f081b77319f8f6e5825dc86d2861d0445.tar.gz
gcc-0936858f081b77319f8f6e5825dc86d2861d0445.tar.bz2
Support fused multiply-adds in fully-masked reductions
This patch adds support for fusing a conditional add or subtract with a multiplication, so that we can use fused multiply-add and multiply-subtract operations for fully-masked reductions. E.g. for SVE we vectorise: double res = 0.0; for (int i = 0; i < n; ++i) res += x[i] * y[i]; using a fully-masked loop in which the loop body has the form: res_1 = PHI<0(preheader), res_2(latch)>; avec = .MASK_LOAD (loop_mask, a) bvec = .MASK_LOAD (loop_mask, b) prod = avec * bvec; res_2 = .COND_ADD (loop_mask, res_1, prod, res_1); where the last statement does the equivalent of: res_2 = loop_mask ? res_1 + prod : res_1; (operating elementwise). The point of the patch is to convert the last two statements into: res_s = .COND_FMA (loop_mask, avec, bvec, res_1, res_1); which is equivalent to: res_2 = loop_mask ? fma (avec, bvec, res_1) : res_1; (again operating elementwise). 2018-07-12 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * internal-fn.h (can_interpret_as_conditional_op_p): Declare. * internal-fn.c (can_interpret_as_conditional_op_p): New function. * tree-ssa-math-opts.c (convert_mult_to_fma_1): Handle conditional plus and minus and convert them into IFN_COND_FMA-based sequences. (convert_mult_to_fma): Handle conditional plus and minus. gcc/testsuite/ * gcc.dg/vect/vect-fma-2.c: New test. * gcc.target/aarch64/sve/reduc_4.c: Likewise. * gcc.target/aarch64/sve/reduc_6.c: Likewise. * gcc.target/aarch64/sve/reduc_7.c: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r262588
Diffstat (limited to 'gcc/tree-ssa-math-opts.c')
-rw-r--r--gcc/tree-ssa-math-opts.c118
1 files changed, 62 insertions, 56 deletions
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 187ca5a..e32669d 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2655,7 +2655,6 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
{
gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
- enum tree_code use_code;
tree addop, mulop1 = op1, result = mul_result;
bool negate_p = false;
gimple_seq seq = NULL;
@@ -2663,8 +2662,8 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
if (is_gimple_debug (use_stmt))
continue;
- use_code = gimple_assign_rhs_code (use_stmt);
- if (use_code == NEGATE_EXPR)
+ if (is_gimple_assign (use_stmt)
+ && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
{
result = gimple_assign_lhs (use_stmt);
use_operand_p use_p;
@@ -2675,22 +2674,23 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
use_stmt = neguse_stmt;
gsi = gsi_for_stmt (use_stmt);
- use_code = gimple_assign_rhs_code (use_stmt);
negate_p = true;
}
- if (gimple_assign_rhs1 (use_stmt) == result)
+ tree cond, else_value, ops[3];
+ tree_code code;
+ if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
+ ops, &else_value))
+ gcc_unreachable ();
+ addop = ops[0] == result ? ops[1] : ops[0];
+
+ if (code == MINUS_EXPR)
{
- addop = gimple_assign_rhs2 (use_stmt);
- /* a * b - c -> a * b + (-c) */
- if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ if (ops[0] == result)
+ /* a * b - c -> a * b + (-c) */
addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
- }
- else
- {
- addop = gimple_assign_rhs1 (use_stmt);
- /* a - b * c -> (-b) * c + a */
- if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+ else
+ /* a - b * c -> (-b) * c + a */
negate_p = !negate_p;
}
@@ -2699,8 +2699,13 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
if (seq)
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
- fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
- gimple_call_set_lhs (fma_stmt, gimple_assign_lhs (use_stmt));
+
+ if (cond)
+ fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
+ op2, addop, else_value);
+ else
+ fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
+ gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (use_stmt));
gsi_replace (&gsi, fma_stmt, true);
/* Follow all SSA edges so that we generate FMS, FNMA and FNMS
@@ -2883,7 +2888,6 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
as an addition. */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
{
- enum tree_code use_code;
tree result = mul_result;
bool negate_p = false;
@@ -2904,13 +2908,9 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
return false;
- if (!is_gimple_assign (use_stmt))
- return false;
-
- use_code = gimple_assign_rhs_code (use_stmt);
-
/* A negate on the multiplication leads to FNMA. */
- if (use_code == NEGATE_EXPR)
+ if (is_gimple_assign (use_stmt)
+ && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
{
ssa_op_iter iter;
use_operand_p usep;
@@ -2932,17 +2932,20 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
use_stmt = neguse_stmt;
if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
return false;
- if (!is_gimple_assign (use_stmt))
- return false;
- use_code = gimple_assign_rhs_code (use_stmt);
negate_p = true;
}
- switch (use_code)
+ tree cond, else_value, ops[3];
+ tree_code code;
+ if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
+ &else_value))
+ return false;
+
+ switch (code)
{
case MINUS_EXPR:
- if (gimple_assign_rhs2 (use_stmt) == result)
+ if (ops[1] == result)
negate_p = !negate_p;
break;
case PLUS_EXPR:
@@ -2952,47 +2955,50 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
return false;
}
- /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
- by a MULT_EXPR that we'll visit later, we might be able to
- get a more profitable match with fnma.
+ if (cond)
+ {
+ if (cond == result || else_value == result)
+ return false;
+ if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
+ return false;
+ }
+
+ /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
+ we'll visit later, we might be able to get a more profitable
+ match with fnma.
OTOH, if we don't, a negate / fma pair has likely lower latency
that a mult / subtract pair. */
- if (use_code == MINUS_EXPR && !negate_p
- && gimple_assign_rhs1 (use_stmt) == result
+ if (code == MINUS_EXPR
+ && !negate_p
+ && ops[0] == result
&& !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
- && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type))
+ && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
+ && TREE_CODE (ops[1]) == SSA_NAME
+ && has_single_use (ops[1]))
{
- tree rhs2 = gimple_assign_rhs2 (use_stmt);
-
- if (TREE_CODE (rhs2) == SSA_NAME)
- {
- gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
- if (has_single_use (rhs2)
- && is_gimple_assign (stmt2)
- && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
- return false;
- }
+ gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
+ if (is_gimple_assign (stmt2)
+ && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
+ return false;
}
- tree use_rhs1 = gimple_assign_rhs1 (use_stmt);
- tree use_rhs2 = gimple_assign_rhs2 (use_stmt);
/* We can't handle a * b + a * b. */
- if (use_rhs1 == use_rhs2)
+ if (ops[0] == ops[1])
return false;
/* If deferring, make sure we are not looking at an instruction that
wouldn't have existed if we were not. */
if (state->m_deferring_p
- && (state->m_mul_result_set.contains (use_rhs1)
- || state->m_mul_result_set.contains (use_rhs2)))
+ && (state->m_mul_result_set.contains (ops[0])
+ || state->m_mul_result_set.contains (ops[1])))
return false;
if (check_defer)
{
- tree use_lhs = gimple_assign_lhs (use_stmt);
+ tree use_lhs = gimple_get_lhs (use_stmt);
if (state->m_last_result)
{
- if (use_rhs2 == state->m_last_result
- || use_rhs1 == state->m_last_result)
+ if (ops[1] == state->m_last_result
+ || ops[0] == state->m_last_result)
defer = true;
else
defer = false;
@@ -3001,12 +3007,12 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
{
gcc_checking_assert (!state->m_initial_phi);
gphi *phi;
- if (use_rhs1 == result)
- phi = result_of_phi (use_rhs2);
+ if (ops[0] == result)
+ phi = result_of_phi (ops[1]);
else
{
- gcc_assert (use_rhs2 == result);
- phi = result_of_phi (use_rhs1);
+ gcc_assert (ops[1] == result);
+ phi = result_of_phi (ops[0]);
}
if (phi)