aarch64: Make sqdmlal2 patterns match canonical RTL

The sqdmlal2 patterns are hidden beneath the SBINQOPS iterator and unfortunately they don't match canonical RTL because the simple accumulate operand comes in the first arm of the SS_PLUS. This patch splits the SS_PLUS and SS_MINUS forms with the SS_PLUS operands set up to match the canonical form, where the complex operand comes first. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Split into... (aarch64_sqdmlsl2_lane<mode>_internal): ... This... (aarch64_sqdmlal2_lane<mode>_internal): ... And this. (aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Split into ... (aarch64_sqdmlsl2_laneq<mode>_internal): ... This... (aarch64_sqdmlal2_laneq<mode>_internal): ... And this. (aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal): Split into... (aarch64_sqdmlsl2_n<mode>_internal): ... This... (aarch64_sqdmlal2_n<mode>_internal): ... And this.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2021-05-14 10:05:42 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2021-05-14 15:31:25 +0100
commit: ff3809b459db881e80f627e81ec946e7bbd7041d (patch)
tree: b02afb0aa23c6027c346c5cd5fcc94b1bb3e0889 /gcc
parent: 4206171605de65df9674a14dd9db75bf4f4ed037 (diff)
download: gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.zip
gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.tar.gz
gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.tar.bz2
1 files changed, 80 insertions, 9 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 802cca3..e59bc7b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5374,9 +5374,9 @@
 
 ;; vqdml[sa]l2_lane
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
+(define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	      (mult:<VWIDE>
@@ -5395,14 +5395,40 @@
   {
     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
     return
-     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+     "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlal2_lane<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+                  (vec_select:<VHALF>
+                    (match_operand:VQ_HSI 2 "register_operand" "w")
+                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(sign_extend:<VWIDE>
+                  (vec_duplicate:<VHALF>
+		    (vec_select:<VEL>
+		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
+		    ))))
+	      (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
+    return
+     "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
   }
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
+(define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	      (mult:<VWIDE>
@@ -5421,7 +5447,33 @@
   {
     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
     return
-     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+     "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+  }
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	      (mult:<VWIDE>
+		(sign_extend:<VWIDE>
+                  (vec_select:<VHALF>
+                    (match_operand:VQ_HSI 2 "register_operand" "w")
+                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(sign_extend:<VWIDE>
+                  (vec_duplicate:<VHALF>
+		    (vec_select:<VEL>
+		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
+		    ))))
+	      (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
+    return
+     "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
   }
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
@@ -5460,9 +5512,9 @@
   DONE;
 })
 
-(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
+(define_insn "aarch64_sqdmlsl2_n<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (SBINQOPS:<VWIDE>
+        (ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	    (mult:<VWIDE>
@@ -5475,7 +5527,26 @@
 		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
 	    (const_int 1))))]
   "TARGET_SIMD"
-  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+  "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlal2_n<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+        (ss_plus:<VWIDE>
+	  (ss_ashift:<VWIDE>
+	    (mult:<VWIDE>
+	      (sign_extend:<VWIDE>
+                (vec_select:<VHALF>
+                  (match_operand:VQ_HSI 2 "register_operand" "w")
+                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	      (sign_extend:<VWIDE>
+                (vec_duplicate:<VHALF>
+		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
+	    (const_int 1))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2021-05-14 10:05:42 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2021-05-14 15:31:25 +0100
commit	ff3809b459db881e80f627e81ec946e7bbd7041d (patch)
tree	b02afb0aa23c6027c346c5cd5fcc94b1bb3e0889 /gcc
parent	4206171605de65df9674a14dd9db75bf4f4ed037 (diff)
download	gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.zip gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.tar.gz gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.tar.bz2