aarch64: Reimplement RSHRN intrinsic patterns with standard RTL codes

This patch reimplements the backend patterns for the rshrn intrinsics using standard RTL codes rather than UNSPECS. We already represent shrn as truncate of a shift. rshrn can be represented as truncate (src + (1 << (shft - 1)) >> shft), similar to how LLVM treats it. I have a follow-up patch to do the same for the rshrn2 pattern, which will allow us to remove the UNSPEC_RSHRN entirely. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_rshrn<mode>_insn_le): Reimplement with standard RTL codes instead of an UNSPEC. (aarch64_rshrn<mode>_insn_be): Likewise. (aarch64_rshrn<mode>): Adjust for the above. * config/aarch64/predicates.md (aarch64_simd_rshrn_imm_vec): Define.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-04-26 15:10:18 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-04-26 15:10:18 +0100
commit: 80afac3121778e509e1847be90a1999236cff03a (patch)
tree: f68eb8add978190c840e4c503b7bfe233020467b
parent: bd0791e899ea542deb6b3821b0a1d435d9754d5c (diff)
download: gcc-80afac3121778e509e1847be90a1999236cff03a.zip
gcc-80afac3121778e509e1847be90a1999236cff03a.tar.gz
gcc-80afac3121778e509e1847be90a1999236cff03a.tar.bz2
2 files changed, 30 insertions, 12 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cb2223d..f891310 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1990,11 +1990,15 @@
 (define_insn "aarch64_rshrn<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
-	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
-		(match_operand:VQN 2
-		  "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
-	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+	  (truncate:<VNARROWQ>
+	    (lshiftrt:VQN
+	      (plus:VQN (match_operand:VQN 1 "register_operand" "w")
+			(match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
+	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
+	  (match_operand:<VNARROWQ> 4 "aarch64_simd_or_scalar_imm_zero")))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN
+   && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
+      == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
   "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
@@ -2002,11 +2006,15 @@
 (define_insn "aarch64_rshrn<mode>_insn_be"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
-	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
-	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
-		(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
-		  UNSPEC_RSHRN)))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+	  (match_operand:<VNARROWQ> 4 "aarch64_simd_or_scalar_imm_zero")
+	  (truncate:<VNARROWQ>
+	    (lshiftrt:VQN
+	      (plus:VQN (match_operand:VQN 1 "register_operand" "w")
+			(match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
+	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN
+   && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
+      == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
   "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
@@ -2024,18 +2032,22 @@
       }
     else
       {
+	rtx shft
+	  = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+					       HOST_WIDE_INT_1U
+					        << (INTVAL (operands[2]) - 1));
 	rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
 	operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 						         INTVAL (operands[2]));
 	if (BYTES_BIG_ENDIAN)
 	  emit_insn (
 		gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
-						 operands[2],
+						 operands[2], shft,
 						 CONST0_RTX (<VNARROWQ>mode)));
 	else
 	  emit_insn (
 		gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
-						 operands[2],
+						 operands[2], shft,
 						 CONST0_RTX (<VNARROWQ>mode)));
 
 	/* The intrinsic expects a narrow result, so emit a subreg that will
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 3f5f4df..242f10a 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -587,6 +587,12 @@
   (and (match_code "const_vector")
        (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)")))
 
+(define_predicate "aarch64_simd_rshrn_imm_vec"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1,
+				HOST_WIDE_INT_1U
+				<< (GET_MODE_UNIT_BITSIZE  (mode) - 1))")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-04-26 15:10:18 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-04-26 15:10:18 +0100
commit	80afac3121778e509e1847be90a1999236cff03a (patch)
tree	f68eb8add978190c840e4c503b7bfe233020467b
parent	bd0791e899ea542deb6b3821b0a1d435d9754d5c (diff)
download	gcc-80afac3121778e509e1847be90a1999236cff03a.zip gcc-80afac3121778e509e1847be90a1999236cff03a.tar.gz gcc-80afac3121778e509e1847be90a1999236cff03a.tar.bz2