AArch64: update costing for MLA by invariant

When determining issue rates we currently discount non-constant MLA accumulators for Advanced SIMD but don't do it for the latency. This means the costs for Advanced SIMD with a constant accumulator are wrong and results in us costing SVE and Advanced SIMD the same. This can cauze us to vectorize with Advanced SIMD instead of SVE in some cases. This patch adds the same discount for SVE and Scalar as we do for issue rate. This gives a 5% improvement in fotonik3d_r in SPECCPU 2017 on large Neoverse cores. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_multiply_add_p): Update handling of constants. (aarch64_adjust_stmt_cost): Use it. (aarch64_vector_costs::count_ops): Likewise. (aarch64_vector_costs::add_stmt_cost): Pass vinfo to aarch64_adjust_stmt_cost.
author: Tamar Christina <tamar.christina@arm.com> 2023-08-04 13:46:36 +0100
committer: Tamar Christina <tamar.christina@arm.com> 2023-08-04 13:46:36 +0100
commit: 0e5205912994fbc43719b43282a62bb35957f8a2 (patch)
tree: 4890be9b537a5b0004eaba2385efb3f6bdbfa626
parent: 1a599caab86464006ea8c9501aff6c6638e891eb (diff)
download: gcc-0e5205912994fbc43719b43282a62bb35957f8a2.zip
gcc-0e5205912994fbc43719b43282a62bb35957f8a2.tar.gz
gcc-0e5205912994fbc43719b43282a62bb35957f8a2.tar.bz2
1 files changed, 15 insertions, 9 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 5b8d8fa..53fbecb 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16410,10 +16410,6 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info,
   if (code != PLUS_EXPR && code != MINUS_EXPR)
     return false;
 
-  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (assign))
-      || CONSTANT_CLASS_P (gimple_assign_rhs2 (assign)))
-    return false;
-
   for (int i = 1; i < 3; ++i)
     {
       tree rhs = gimple_op (assign, i);
@@ -16441,7 +16437,8 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info,
 	    return false;
 	  def_stmt_info = vinfo->lookup_def (rhs);
 	  if (!def_stmt_info
-	      || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def)
+	      || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def
+	      || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_constant_def)
 	    return false;
 	}
 
@@ -16721,8 +16718,9 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
    and which when vectorized would operate on vector type VECTYPE.  Add the
    cost of any embedded operations.  */
 static fractional_cost
-aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
-			  tree vectype, fractional_cost stmt_cost)
+aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
+			  stmt_vec_info stmt_info, tree vectype,
+			  unsigned vec_flags, fractional_cost stmt_cost)
 {
   if (vectype)
     {
@@ -16745,6 +16743,14 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
 	  break;
 	}
 
+      gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info));
+      if (assign && !vect_is_reduction (stmt_info))
+	{
+	  /* For MLA we need to reduce the cost since MLA is 1 instruction.  */
+	  if (aarch64_multiply_add_p (vinfo, stmt_info, vec_flags))
+	    return 0;
+	}
+
       if (kind == vector_stmt || kind == vec_to_scalar)
 	if (tree cmp_type = vect_embedded_comparison_type (stmt_info))
 	  {
@@ -17060,8 +17066,8 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
     {
       /* Account for any extra "embedded" costs that apply additively
 	 to the base cost calculated above.  */
-      stmt_cost = aarch64_adjust_stmt_cost (kind, stmt_info, vectype,
-					    stmt_cost);
+      stmt_cost = aarch64_adjust_stmt_cost (m_vinfo, kind, stmt_info,
+					    vectype, m_vec_flags, stmt_cost);
 
       /* If we're recording a nonzero vector loop body cost for the
 	 innermost loop, also estimate the operations that would need
author	Tamar Christina <tamar.christina@arm.com>	2023-08-04 13:46:36 +0100
committer	Tamar Christina <tamar.christina@arm.com>	2023-08-04 13:46:36 +0100
commit	0e5205912994fbc43719b43282a62bb35957f8a2 (patch)
tree	4890be9b537a5b0004eaba2385efb3f6bdbfa626
parent	1a599caab86464006ea8c9501aff6c6638e891eb (diff)
download	gcc-0e5205912994fbc43719b43282a62bb35957f8a2.zip gcc-0e5205912994fbc43719b43282a62bb35957f8a2.tar.gz gcc-0e5205912994fbc43719b43282a62bb35957f8a2.tar.bz2