aarch64: [US]Q(R)SHR(U)N scalar forms refactoring

Some instructions from the previous patch have scalar forms: SQSHRN,SQRSHRN,UQSHRN,UQRSHRN,SQSHRUN,SQRSHRUN. This patch converts the patterns for these to use standard RTL codes. Their MD patterns deviate slightly from the vector forms mostly due to things like operands being scalar rather than vectors. One nuance is in the SQSHRUN,SQRSHRUN patterns. These end in a truncate to the scalar narrow mode e.g. SI -> QI. This gets simplified by the RTL passes to a subreg rather than keeping it as a truncate. So we end up representing these without the truncate and in the expander read the narrow subreg in order to comply with the expected width of the intrinsic. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n_n<mode>): Rename to... (aarch64_<shrn_op>shrn_n<mode>): ... This. Reimplement with RTL codes. (*aarch64_<shrn_op>rshrn_n<mode>_insn): New define_insn. (aarch64_sqrshrun_n<mode>_insn): Likewise. (aarch64_sqshrun_n<mode>_insn): Likewise. (aarch64_<shrn_op>rshrn_n<mode>): New define_expand. (aarch64_sqshrun_n<mode>): Likewise. (aarch64_sqrshrun_n<mode>): Likewise. * config/aarch64/iterators.md (V2XWIDE): Add HI and SI modes.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-06-06 23:35:52 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-06-16 13:52:23 +0100
commit: d20b2ad845876eec0ee80a3933ad49f9f6c4ee30 (patch)
tree: 184f2b31fba4f05d4c42515a84ddd847fd7a9cba
parent: ffb87344dd343df60eafb10d510ac704f37417ca (diff)
download: gcc-d20b2ad845876eec0ee80a3933ad49f9f6c4ee30.zip
gcc-d20b2ad845876eec0ee80a3933ad49f9f6c4ee30.tar.gz
gcc-d20b2ad845876eec0ee80a3933ad49f9f6c4ee30.tar.bz2
2 files changed, 106 insertions, 8 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8b92981..bbb5434 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6654,15 +6654,15 @@
 
 ;; vq(r)shr(u)n_n
 
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
+(define_insn "aarch64_<shrn_op>shrn_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-        (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
-			    (match_operand:SI 2
-			      "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
-			   VQSHRN_N))]
+	(SAT_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:SD_HSDI
+	    (match_operand:SD_HSDI 1 "register_operand" "w")
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
   "TARGET_SIMD"
-  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+  "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
 (define_insn "*aarch64_<shrn_op>shrn_n<mode>_insn<vczle><vczbe>"
@@ -6704,6 +6704,41 @@
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(SAT_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:<DWI>
+	    (plus:<DWI>
+	      (<TRUNCEXTEND>:<DWI>
+	        (match_operand:SD_HSDI 1 "register_operand" "w"))
+	      (match_operand:<DWI> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+  "TARGET_SIMD
+   && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+  "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<shrn_op>rshrn_n<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(SAT_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:<V2XWIDE>
+	    (plus:<V2XWIDE>
+	      (<TRUNCEXTEND>:<V2XWIDE>
+	        (match_operand:SD_HSDI 1 "register_operand"))
+	      (match_dup 3))
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+  "TARGET_SIMD"
+  {
+    /* Use this expander to create the rounding constant vector, which is
+       1 << (shift - 1).  Use wide_int here to ensure that the right TImode
+       RTL is generated when handling the DImode expanders.  */
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+    operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+  }
+)
+
 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
 	(ALL_TRUNC:<VNARROWQ>
@@ -6748,6 +6783,34 @@
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_sqshrun_n<mode>_insn"
+  [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
+	(smin:SD_HSDI
+	  (smax:SD_HSDI
+	    (ashiftrt:SD_HSDI
+	      (match_operand:SD_HSDI 1 "register_operand" "w")
+	      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+	    (const_int 0))
+	  (const_int <half_mask>)))]
+  "TARGET_SIMD"
+  "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqshrun_n<mode>"
+  [(match_operand:<VNARROWQ> 0 "register_operand")
+   (match_operand:SD_HSDI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+  "TARGET_SIMD"
+  {
+    rtx dst = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
+						 operands[2]));
+    emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
+    DONE;
+  }
+)
+
 (define_expand "aarch64_sqshrun_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
 	(truncate:<VNARROWQ>
@@ -6788,6 +6851,40 @@
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
+(define_insn "aarch64_sqrshrun_n<mode>_insn"
+  [(set (match_operand:<V2XWIDE> 0 "register_operand" "=w")
+	(smin:<V2XWIDE>
+	  (smax:<V2XWIDE>
+	    (ashiftrt:<V2XWIDE>
+	      (plus:<V2XWIDE>
+		(sign_extend:<V2XWIDE>
+		  (match_operand:SD_HSDI 1 "register_operand" "w"))
+		(match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+	      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+	    (const_int 0))
+	  (const_int <half_mask>)))]
+  "TARGET_SIMD
+   && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+  "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqrshrun_n<mode>"
+  [(match_operand:<VNARROWQ> 0 "register_operand")
+   (match_operand:SD_HSDI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
+  "TARGET_SIMD"
+  {
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+    rtx rnd = immed_wide_int_const (rnd_wi, <V2XWIDE>mode);
+    rtx dst = gen_reg_rtx (<V2XWIDE>mode);
+    emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
+    emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
+    DONE;
+  }
+)
+
 (define_expand "aarch64_sqrshrun_n<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand")
 	(truncate:<VNARROWQ>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e8c62c8..acc7a3e 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1532,7 +1532,8 @@
 (define_mode_attr V2XWIDE [(V8QI "V8HI") (V4HI "V4SI")
 			   (V16QI "V16HI") (V8HI "V8SI")
 			   (V2SI "V2DI") (V4SI "V4DI")
-			   (V2DI "V2TI") (DI "TI")])
+			   (V2DI "V2TI") (DI "TI")
+			   (HI "SI") (SI "DI")])
 
 ;; Predicate mode associated with VWIDE.
 (define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")])
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-06-06 23:35:52 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-06-16 13:52:23 +0100
commit	d20b2ad845876eec0ee80a3933ad49f9f6c4ee30 (patch)
tree	184f2b31fba4f05d4c42515a84ddd847fd7a9cba
parent	ffb87344dd343df60eafb10d510ac704f37417ca (diff)
download	gcc-d20b2ad845876eec0ee80a3933ad49f9f6c4ee30.zip gcc-d20b2ad845876eec0ee80a3933ad49f9f6c4ee30.tar.gz gcc-d20b2ad845876eec0ee80a3933ad49f9f6c4ee30.tar.bz2