aarch64: Tweak MLA vector costs

The issue-based vector costs currently assume that a multiply-add sequence can be implemented using a single instruction. This is generally true for scalars (which have a 4-operand instruction) and SVE (which allows the output to be tied to any input). However, for Advanced SIMD, multiplying two values and adding an invariant will end up being a move and an MLA. The only target to use the issue-based vector costs is Neoverse V1, which would generally prefer SVE in this case anyway. I therefore don't have a self-contained testcase. However, the distinction becomes more important with a later patch. gcc/ * config/aarch64/aarch64.c (aarch64_multiply_add_p): Add a vec_flags parameter. Detect cases in which an Advanced SIMD MLA would almost certainly require a MOV. (aarch64_count_ops): Update accordingly.
author: Richard Sandiford <richard.sandiford@arm.com> 2021-08-03 13:00:47 +0100
committer: Richard Sandiford <richard.sandiford@arm.com> 2021-08-03 13:00:47 +0100
commit: 028059b46ec9aef7dd447792c579f35396751068 (patch)
tree: c18014f9620fb867bf2dd9badde458ab466e5c2d
parent: 537afb0857c8f60c2b60a09fad4660420cd13e8f (diff)
download: gcc-028059b46ec9aef7dd447792c579f35396751068.zip
gcc-028059b46ec9aef7dd447792c579f35396751068.tar.gz
gcc-028059b46ec9aef7dd447792c579f35396751068.tar.bz2
1 files changed, 22 insertions, 3 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 084f8ca..19045ef 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14767,9 +14767,12 @@ aarch64_integer_truncation_p (stmt_vec_info stmt_info)
 
 /* Return true if STMT_INFO is the second part of a two-statement multiply-add
    or multiply-subtract sequence that might be suitable for fusing into a
-   single instruction.  */
+   single instruction.  If VEC_FLAGS is zero, analyze the operation as
+   a scalar one, otherwise analyze it as an operation on vectors with those
+   VEC_* flags.  */
 static bool
-aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info)
+aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info,
+			unsigned int vec_flags)
 {
   gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
   if (!assign)
@@ -14797,6 +14800,22 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info)
       if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR)
 	continue;
 
+      if (vec_flags & VEC_ADVSIMD)
+	{
+	  /* Scalar and SVE code can tie the result to any FMLA input (or none,
+	     although that requires a MOVPRFX for SVE).  However, Advanced SIMD
+	     only supports MLA forms, so will require a move if the result
+	     cannot be tied to the accumulator.  The most important case in
+	     which this is true is when the accumulator input is invariant.  */
+	  rhs = gimple_op (assign, 3 - i);
+	  if (TREE_CODE (rhs) != SSA_NAME)
+	    return false;
+	  def_stmt_info = vinfo->lookup_def (rhs);
+	  if (!def_stmt_info
+	      || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def)
+	    return false;
+	}
+
       return true;
     }
   return false;
@@ -15232,7 +15251,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs,
     }
 
   /* Assume that multiply-adds will become a single operation.  */
-  if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info))
+  if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info, vec_flags))
     return;
 
   /* When costing scalar statements in vector code, the count already
author	Richard Sandiford <richard.sandiford@arm.com>	2021-08-03 13:00:47 +0100
committer	Richard Sandiford <richard.sandiford@arm.com>	2021-08-03 13:00:47 +0100
commit	028059b46ec9aef7dd447792c579f35396751068 (patch)
tree	c18014f9620fb867bf2dd9badde458ab466e5c2d
parent	537afb0857c8f60c2b60a09fad4660420cd13e8f (diff)
download	gcc-028059b46ec9aef7dd447792c579f35396751068.zip gcc-028059b46ec9aef7dd447792c579f35396751068.tar.gz gcc-028059b46ec9aef7dd447792c579f35396751068.tar.bz2