aarch64: Convert ADDLP and ADALP patterns to standard RTL codes

This patch converts the patterns for the integer widen and pairwise-add instructions to standard RTL operations. The pairwise addition withing a vector can be represented as an addition of two vec_selects, one selecting the even elements, and one selecting odd. Thus for the intrinsic vpaddlq_s8 we can generate: (set (reg:V8HI 92) (plus:V8HI (vec_select:V8HI (sign_extend:V16HI (reg/v:V16QI 93 [ a ])) (parallel [ (const_int 0 [0]) (const_int 2 [0x2]) (const_int 4 [0x4]) (const_int 6 [0x6]) (const_int 8 [0x8]) (const_int 10 [0xa]) (const_int 12 [0xc]) (const_int 14 [0xe]) ])) (vec_select:V8HI (sign_extend:V16HI (reg/v:V16QI 93 [ a ])) (parallel [ (const_int 1 [0x1]) (const_int 3 [0x3]) (const_int 5 [0x5]) (const_int 7 [0x7]) (const_int 9 [0x9]) (const_int 11 [0xb]) (const_int 13 [0xd]) (const_int 15 [0xf]) ])))) Similarly for the accumulating forms where there's an extra outer PLUS for the accumulation. We already have the handy helper functions aarch64_stepped_int_parallel_p and aarch64_gen_stepped_int_parallel defined in aarch64.cc that we can make use of to define the right predicate for the VEC_SELECT PARALLEL. This patch allows us to remove some code iterators and the UNSPEC definitions for SADDLP and UADDLP. UNSPEC_UADALP and UNSPEC_SADALP are retained because they are used by SVE2 patterns still. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_<sur>adalp<mode>): Delete. (aarch64_<su>adalp<mode>): New define_expand. (*aarch64_<su>adalp<mode><vczle><vczbe>_insn): New define_insn. (aarch64_<su>addlp<mode>): Convert to define_expand. (*aarch64_<su>addlp<mode><vczle><vczbe>_insn): New define_insn. * config/aarch64/iterators.md (UNSPEC_SADDLP, UNSPEC_UADDLP): Delete. (ADALP): Likewise. (USADDLP): Likewise. * config/aarch64/predicates.md (vect_par_cnst_even_or_odd_half): Define.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-05-30 10:41:02 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-05-30 10:41:02 +0100
commit: 365b1d5493988b6bd40183d1fe49bd8a3b32a6bb (patch)
tree: 9f201d77c1ef7c1962eb80da093aa0d983206813
parent: a4dae58abe1a3961aece740b0fada995750c277c (diff)
download: gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.zip
gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.tar.gz
gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.tar.bz2
3 files changed, 74 insertions, 19 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c4171ed..2d87ed7 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1039,13 +1039,39 @@
   }
 )
 
-(define_insn "aarch64_<sur>adalp<mode>"
+(define_expand "aarch64_<su>adalp<mode>"
+  [(set (match_operand:<VDBLW> 0 "register_operand")
+	(plus:<VDBLW>
+	  (plus:<VDBLW>
+	    (vec_select:<VDBLW>
+	      (ANY_EXTEND:<V2XWIDE>
+		(match_operand:VDQV_L 2 "register_operand"))
+	      (match_dup 3))
+	    (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
+	      (match_dup 4)))
+	  (match_operand:<VDBLW> 1 "register_operand")))]
+ "TARGET_SIMD"
+ {
+   int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
+   operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
+   operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
+ }
+)
+
+(define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
-	(unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
-			 (match_operand:<VDBLW> 1 "register_operand" "0")]
-	ADALP))]
-  "TARGET_SIMD"
-  "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
+	(plus:<VDBLW>
+	  (plus:<VDBLW>
+	    (vec_select:<VDBLW>
+	      (ANY_EXTEND:<V2XWIDE>
+		(match_operand:VDQV_L 2 "register_operand" "w"))
+	      (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
+	    (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
+	      (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
+	(match_operand:<VDBLW> 1 "register_operand" "0")))]
+ "TARGET_SIMD
+  && !rtx_equal_p (operands[3], operands[4])"
+ "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
   [(set_attr "type" "neon_reduc_add<q>")]
 )
 
@@ -3699,11 +3725,34 @@
   [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
 )
 
-(define_insn "aarch64_<su>addlp<mode>"
- [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
-       (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")]
-		    USADDLP))]
+(define_expand "aarch64_<su>addlp<mode>"
+  [(set (match_operand:<VDBLW> 0 "register_operand")
+	(plus:<VDBLW>
+	  (vec_select:<VDBLW>
+	    (ANY_EXTEND:<V2XWIDE>
+	      (match_operand:VDQV_L 1 "register_operand"))
+	    (match_dup 2))
+	  (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
+	    (match_dup 3))))]
  "TARGET_SIMD"
+ {
+   int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
+   operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
+   operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
+ }
+)
+
+(define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
+  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+	(plus:<VDBLW>
+	  (vec_select:<VDBLW>
+	    (ANY_EXTEND:<V2XWIDE>
+	      (match_operand:VDQV_L 1 "register_operand" "w"))
+	    (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
+	  (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
+	    (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
+ "TARGET_SIMD
+  && !rtx_equal_p (operands[2], operands[3])"
  "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
   [(set_attr "type" "neon_reduc_add<q>")]
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index cf49f00..d9c7354 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -667,8 +667,6 @@
     UNSPEC_SSHLL	; Used in aarch64-simd.md.
     UNSPEC_USHLL	; Used in aarch64-simd.md.
     UNSPEC_ADDP		; Used in aarch64-simd.md.
-    UNSPEC_SADDLP	; Used in aarch64-simd.md.
-    UNSPEC_UADDLP	; Used in aarch64-simd.md.
     UNSPEC_TBL		; Used in vector permute patterns.
     UNSPEC_TBX		; Used in vector permute patterns.
     UNSPEC_CONCAT	; Used in vector permute patterns.
@@ -2581,9 +2579,6 @@
 ;; Int Iterators.
 ;; -------------------------------------------------------------------
 
-;; The unspec codes for the SADALP, UADALP AdvancedSIMD instructions.
-(define_int_iterator ADALP [UNSPEC_SADALP UNSPEC_UADALP])
-
 (define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV
 			      UNSPEC_SMAXV UNSPEC_SMINV])
 
@@ -2594,8 +2589,6 @@
 
 (define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV])
 
-(define_int_iterator USADDLP [UNSPEC_SADDLP UNSPEC_UADDLP])
-
 (define_int_iterator USADDLV [UNSPEC_SADDLV UNSPEC_UADDLV])
 
 (define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF])
@@ -3339,8 +3332,6 @@
 ;; "s" for signed operations and "u" for unsigned ones.
 (define_int_attr su [(UNSPEC_SADDV "s")
 		     (UNSPEC_UADDV "u")
-		     (UNSPEC_SADDLP "s")
-		     (UNSPEC_UADDLP "u")
 		     (UNSPEC_SADDLV "s")
 		     (UNSPEC_UADDLV "u")
 		     (UNSPEC_UNPACKSHI "s")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 0ce3741..3cbc735 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -460,6 +460,21 @@
   return aarch64_simd_check_vect_par_cnst_half (op, mode, false);
 })
 
+;; PARALLEL for a vec_select that selects all the even or all the odd
+;; elements of a vector of MODE.
+(define_special_predicate "vect_par_cnst_even_or_odd_half"
+  (match_code "parallel")
+{
+  int nunits = XVECLEN (op, 0);
+  if (!known_eq (GET_MODE_NUNITS (mode), nunits * 2))
+    return false;
+  rtx first = XVECEXP (op, 0, 0);
+  if (!CONST_INT_P (first))
+    return false;
+  return (INTVAL (first) == 0 || INTVAL (first) == 1)
+	 && aarch64_stepped_int_parallel_p (op, 2);
+})
+
 (define_predicate "descending_int_parallel"
   (match_code "parallel")
 {
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-05-30 10:41:02 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-05-30 10:41:02 +0100
commit	365b1d5493988b6bd40183d1fe49bd8a3b32a6bb (patch)
tree	9f201d77c1ef7c1962eb80da093aa0d983206813
parent	a4dae58abe1a3961aece740b0fada995750c277c (diff)
download	gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.zip gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.tar.gz gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.tar.bz2