diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2021-08-03 13:00:47 +0100 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2021-08-03 13:00:47 +0100 |
commit | 028059b46ec9aef7dd447792c579f35396751068 (patch) | |
tree | c18014f9620fb867bf2dd9badde458ab466e5c2d | |
parent | 537afb0857c8f60c2b60a09fad4660420cd13e8f (diff) | |
download | gcc-028059b46ec9aef7dd447792c579f35396751068.zip gcc-028059b46ec9aef7dd447792c579f35396751068.tar.gz gcc-028059b46ec9aef7dd447792c579f35396751068.tar.bz2 |
aarch64: Tweak MLA vector costs
The issue-based vector costs currently assume that a multiply-add
sequence can be implemented using a single instruction. This is
generally true for scalars (which have a 4-operand instruction)
and SVE (which allows the output to be tied to any input).
However, for Advanced SIMD, multiplying two values and adding
an invariant will end up being a move and an MLA.
The only target to use the issue-based vector costs is Neoverse V1,
which would generally prefer SVE in this case anyway. I therefore
don't have a self-contained testcase. However, the distinction
becomes more important with a later patch.
gcc/
* config/aarch64/aarch64.c (aarch64_multiply_add_p): Add a vec_flags
parameter. Detect cases in which an Advanced SIMD MLA would almost
certainly require a MOV.
(aarch64_count_ops): Update accordingly.
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 25 |
1 files changed, 22 insertions, 3 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 084f8ca..19045ef 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14767,9 +14767,12 @@ aarch64_integer_truncation_p (stmt_vec_info stmt_info) /* Return true if STMT_INFO is the second part of a two-statement multiply-add or multiply-subtract sequence that might be suitable for fusing into a - single instruction. */ + single instruction. If VEC_FLAGS is zero, analyze the operation as + a scalar one, otherwise analyze it as an operation on vectors with those + VEC_* flags. */ static bool -aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info) +aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info, + unsigned int vec_flags) { gassign *assign = dyn_cast<gassign *> (stmt_info->stmt); if (!assign) @@ -14797,6 +14800,22 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info) if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR) continue; + if (vec_flags & VEC_ADVSIMD) + { + /* Scalar and SVE code can tie the result to any FMLA input (or none, + although that requires a MOVPRFX for SVE). However, Advanced SIMD + only supports MLA forms, so will require a move if the result + cannot be tied to the accumulator. The most important case in + which this is true is when the accumulator input is invariant. */ + rhs = gimple_op (assign, 3 - i); + if (TREE_CODE (rhs) != SSA_NAME) + return false; + def_stmt_info = vinfo->lookup_def (rhs); + if (!def_stmt_info + || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def) + return false; + } + return true; } return false; @@ -15232,7 +15251,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, } /* Assume that multiply-adds will become a single operation. */ - if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info)) + if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info, vec_flags)) return; /* When costing scalar statements in vector code, the count already |