[AArch64] Improve register allocation of fma

This patch improves register allocation of fma by preferring to update the accumulator register. This is done by adding fma insns with operand 1 as the accumulator. The register allocator considers copy preferences only in operand order, so if the first operand is dead, it has the highest chance of being reused as the destination. As a result code using fma often has a better register allocation. Performance of SPECFP2017 improves by over 0.5% on some implementations, while it had no effect on other implementations. Fma is more readable too, in a simple example we now generate: fmadd s16, s2, s1, s16 fmadd s7, s17, s16, s7 fmadd s6, s16, s7, s6 fmadd s5, s7, s6, s5 instead of: fmadd s16, s16, s2, s1 fmadd s7, s7, s16, s6 fmadd s6, s6, s7, s5 fmadd s5, s5, s6, s4 gcc/ * config/aarch64/aarch64.md (fma<mode>4): Change into expand pattern. (fnma<mode>4): Likewise. (fms<mode>4): Likewise. (fnms<mode>4): Likewise. (aarch64_fma<mode>4): Rename insn, reorder accumulator operand. (aarch64_fnma<mode>4): Likewise. (aarch64_fms<mode>4): Likewise. (aarch64_fnms<mode>4): Likewise. (aarch64_fnmadd<mode>4): Likewise. From-SVN: r260292
author: Wilco Dijkstra <wdijkstr@arm.com> 2018-05-16 14:33:16 +0000
committer: Wilco Dijkstra <wilco@gcc.gnu.org> 2018-05-16 14:33:16 +0000
commit: d6e6e8b677a0dfec33b6adee2a7916c42cc38934 (patch)
tree: bb8538b7e4e61a8407039d886ceb97ef55c88f26 /gcc
parent: df0fc585b70b65a188de010f50cb90b0db3b9045 (diff)
download: gcc-d6e6e8b677a0dfec33b6adee2a7916c42cc38934.zip
gcc-d6e6e8b677a0dfec33b6adee2a7916c42cc38934.tar.gz
gcc-d6e6e8b677a0dfec33b6adee2a7916c42cc38934.tar.bz2
2 files changed, 75 insertions, 26 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b90e1da..6390b42 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2018-05-16  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* config/aarch64/aarch64.md (fma<mode>4): Change into expand pattern.
+	(fnma<mode>4): Likewise.
+	(fms<mode>4): Likewise.
+	(fnms<mode>4): Likewise.
+	(aarch64_fma<mode>4): Rename insn, reorder accumulator operand.
+	(aarch64_fnma<mode>4): Likewise.
+	(aarch64_fms<mode>4): Likewise.
+	(aarch64_fnms<mode>4): Likewise.
+	(aarch64_fnmadd<mode>4): Likewise.
+
 2018-05-16  Jason Merrill  <jason@redhat.com>
 
 	* tree.c (warn_deprecated_use): Return bool.  Simplify logic.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 68ea718..6556303 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4973,57 +4973,94 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
-;; fma - no throw
+;; fma - expand fma into patterns with the accumulator operand first since
+;; reusing the accumulator results in better register allocation.
+;; The register allocator considers copy preferences in operand order,
+;; so this prefers fmadd s0, s1, s2, s0 over fmadd s1, s1, s2, s0.
+
+(define_expand "fma<mode>4"
+  [(set (match_operand:GPF_F16 0 "register_operand")
+	(fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand")
+		     (match_operand:GPF_F16 2 "register_operand")
+		     (match_operand:GPF_F16 3 "register_operand")))]
+  "TARGET_FLOAT"
+)
 
-(define_insn "fma<mode>4"
+(define_insn "*aarch64_fma<mode>4"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
-        (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")
-		     (match_operand:GPF_F16 2 "register_operand" "w")
-		     (match_operand:GPF_F16 3 "register_operand" "w")))]
+	(fma:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w")
+		     (match_operand:GPF_F16 3 "register_operand" "w")
+		     (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
-  "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<stype>")]
 )
 
-(define_insn "fnma<mode>4"
+(define_expand "fnma<mode>4"
+  [(set (match_operand:GPF_F16 0 "register_operand")
+	(fma:GPF_F16
+	  (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand"))
+	  (match_operand:GPF_F16 2 "register_operand")
+	  (match_operand:GPF_F16 3 "register_operand")))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fnma<mode>4"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
 	(fma:GPF_F16
-	  (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w"))
-	  (match_operand:GPF_F16 2 "register_operand" "w")
-	  (match_operand:GPF_F16 3 "register_operand" "w")))]
+	  (neg:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w"))
+	  (match_operand:GPF_F16 3 "register_operand" "w")
+	  (match_operand:GPF_F16 1 "register_operand" "w")))]
   "TARGET_FLOAT"
-  "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fmsub\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<stype>")]
 )
 
-(define_insn "fms<mode>4"
+
+(define_expand "fms<mode>4"
+  [(set (match_operand:GPF 0 "register_operand")
+	(fma:GPF (match_operand:GPF 1 "register_operand")
+		 (match_operand:GPF 2 "register_operand")
+		 (neg:GPF (match_operand:GPF 3 "register_operand"))))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fms<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-        (fma:GPF (match_operand:GPF 1 "register_operand" "w")
-		 (match_operand:GPF 2 "register_operand" "w")
-		 (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
+	(fma:GPF (match_operand:GPF 2 "register_operand" "w")
+		 (match_operand:GPF 3 "register_operand" "w")
+		 (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))]
   "TARGET_FLOAT"
-  "fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmsub\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
 
-(define_insn "fnms<mode>4"
+(define_expand "fnms<mode>4"
+  [(set (match_operand:GPF 0 "register_operand")
+	(fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand"))
+		 (match_operand:GPF 2 "register_operand")
+		 (neg:GPF (match_operand:GPF 3 "register_operand"))))]
+  "TARGET_FLOAT"
+)
+
+(define_insn "*aarch64_fnms<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-	(fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w"))
-		 (match_operand:GPF 2 "register_operand" "w")
-		 (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))]
+	(fma:GPF (neg:GPF (match_operand:GPF 2 "register_operand" "w"))
+		 (match_operand:GPF 3 "register_operand" "w")
+		 (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))]
   "TARGET_FLOAT"
-  "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
 
 ;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
-(define_insn "*fnmadd<mode>4"
+(define_insn "*aarch64_fnmadd<mode>4"
   [(set (match_operand:GPF 0 "register_operand" "=w")
-	(neg:GPF (fma:GPF (match_operand:GPF 1 "register_operand" "w")
-			  (match_operand:GPF 2 "register_operand" "w")
-			  (match_operand:GPF 3 "register_operand" "w"))))]
+	(neg:GPF (fma:GPF (match_operand:GPF 2 "register_operand" "w")
+			  (match_operand:GPF 3 "register_operand" "w")
+			  (match_operand:GPF 1 "register_operand" "w"))))]
   "!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT"
-  "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3"
+  "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1"
   [(set_attr "type" "fmac<s>")]
 )
author	Wilco Dijkstra <wdijkstr@arm.com>	2018-05-16 14:33:16 +0000
committer	Wilco Dijkstra <wilco@gcc.gnu.org>	2018-05-16 14:33:16 +0000
commit	d6e6e8b677a0dfec33b6adee2a7916c42cc38934 (patch)
tree	bb8538b7e4e61a8407039d886ceb97ef55c88f26 /gcc
parent	df0fc585b70b65a188de010f50cb90b0db3b9045 (diff)
download	gcc-d6e6e8b677a0dfec33b6adee2a7916c42cc38934.zip gcc-d6e6e8b677a0dfec33b6adee2a7916c42cc38934.tar.gz gcc-d6e6e8b677a0dfec33b6adee2a7916c42cc38934.tar.bz2