5 files changed, 174 insertions, 98 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 94ff3f1..87af8f3 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -260,17 +260,15 @@
   BUILTIN_VDQHS (TERNOP, mls_n, 0, NONE)
   BUILTIN_VDQHS (TERNOPU, mls_n, 0, NONE)
 
-  /* Implemented by aarch64_shrn<mode>".  */
-  BUILTIN_VQN (SHIFTIMM, shrn, 0, NONE)
-  BUILTIN_VQN (USHIFTIMM, shrn, 0, NONE)
+  BUILTIN_VQN (SHIFTIMM, shrn_n, 0, NONE)
+  BUILTIN_VQN (USHIFTIMM, shrn_n, 0, NONE)
 
   /* Implemented by aarch64_shrn2<mode>.  */
   BUILTIN_VQN (SHIFT2IMM, shrn2, 0, NONE)
   BUILTIN_VQN (USHIFT2IMM, shrn2, 0, NONE)
 
-  /* Implemented by aarch64_rshrn<mode>".  */
-  BUILTIN_VQN (SHIFTIMM, rshrn, 0, NONE)
-  BUILTIN_VQN (USHIFTIMM, rshrn, 0, NONE)
+  BUILTIN_VQN (SHIFTIMM, rshrn_n, 0, NONE)
+  BUILTIN_VQN (USHIFTIMM, rshrn_n, 0, NONE)
 
   /* Implemented by aarch64_rshrn2<mode>.  */
   BUILTIN_VQN (SHIFT2IMM, rshrn2, 0, NONE)
@@ -470,7 +468,6 @@
   /* Implemented by aarch64_<sur>shll2_n<mode>.  */
   BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE)
   BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE)
-  /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>.  */
   BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, NONE)
   BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, NONE)
   BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 3286f42..8b92981 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1896,16 +1896,6 @@
  }
 )
 
-(define_insn "*aarch64_<srn_op>shrn<mode><vczle><vczbe>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-        (truncate:<VNARROWQ>
-          (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
-            (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
-  "TARGET_SIMD"
-  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
 (define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
@@ -1958,58 +1948,6 @@
   [(set_attr "type" "neon_permute<q>")]
 )
 
-(define_expand "aarch64_shrn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand")
-	(truncate:<VNARROWQ>
-	  (lshiftrt:VQN (match_operand:VQN 1 "register_operand")
-	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
-  "TARGET_SIMD"
-  {
-    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-						 INTVAL (operands[2]));
-  }
-)
-
-(define_insn "aarch64_rshrn<mode><vczle><vczbe>_insn"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-	(truncate:<VNARROWQ>
-	  (lshiftrt:VQN
-	    (plus:VQN (match_operand:VQN 1 "register_operand" "w")
-		      (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
-	    (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
-  "TARGET_SIMD
-   && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
-      == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
-  "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_expand "aarch64_rshrn<mode>"
-  [(match_operand:<VNARROWQ> 0 "register_operand")
-   (match_operand:VQN 1 "register_operand")
-   (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
-  "TARGET_SIMD"
-  {
-    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
-      {
-	rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
-	emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
-      }
-    else
-      {
-	rtx shft
-	  = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-					       HOST_WIDE_INT_1U
-					        << (INTVAL (operands[2]) - 1));
-	operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-						         INTVAL (operands[2]));
-	emit_insn (gen_aarch64_rshrn<mode>_insn (operands[0], operands[1],
-						 operands[2], shft));
-      }
-    DONE;
-  }
-)
-
 (define_insn "aarch64_shrn2<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
@@ -6727,31 +6665,153 @@
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>"
+(define_insn "*aarch64_<shrn_op>shrn_n<mode>_insn<vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-	(unspec:<VNARROWQ>
-	  [(match_operand:VQN 1 "register_operand" "w")
-	   (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
-	   VQSHRN_N))]
+	(ALL_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:VQN
+	    (match_operand:VQN 1 "register_operand" "w")
+	    (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
   "TARGET_SIMD"
-  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
-(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
+(define_expand "aarch64_<shrn_op>shrn_n<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(ALL_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:VQN
+	    (match_operand:VQN 1 "register_operand")
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+  "TARGET_SIMD"
+  {
+    operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						 INTVAL (operands[2]));
+  }
+)
+
+(define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
-			    (match_operand:SI 2
-			      "aarch64_simd_shift_imm_offset_<ve_mode>")]
-			   VQSHRN_N))]
+	(ALL_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:<V2XWIDE>
+	    (plus:<V2XWIDE>
+	      (<TRUNCEXTEND>:<V2XWIDE>
+	        (match_operand:VQN 1 "register_operand" "w"))
+	      (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+	    (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
+  "TARGET_SIMD
+   && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+  "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<shrn_op>rshrn_n<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(ALL_TRUNC:<VNARROWQ>
+	  (<TRUNC_SHIFT>:<V2XWIDE>
+	    (plus:<V2XWIDE>
+	      (<TRUNCEXTEND>:<V2XWIDE>
+	        (match_operand:VQN 1 "register_operand"))
+	      (match_dup 3))
+	    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
+  "TARGET_SIMD"
+  {
+    if (<CODE> == TRUNCATE
+	&& INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
+      {
+	rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
+	emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
+	DONE;
+      }
+    /* Use this expander to create the rounding constant vector, which is
+       1 << (shift - 1).  Use wide_int here to ensure that the right TImode
+       RTL is generated when handling the DImode expanders.  */
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+    operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+    operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
+    operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
+  }
+)
+
+(define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(truncate:<VNARROWQ>
+	  (smin:VQN
+	    (smax:VQN
+	      (ashiftrt:VQN
+		(match_operand:VQN 1 "register_operand" "w")
+		(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
+	      (match_operand:VQN 3 "aarch64_simd_imm_zero"))
+	    (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
+  "TARGET_SIMD"
+  "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqshrun_n<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(truncate:<VNARROWQ>
+	  (smin:VQN
+	    (smax:VQN
+	      (ashiftrt:VQN
+		(match_operand:VQN 1 "register_operand")
+		(match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+	      (match_dup 3))
+	    (match_dup 4))))]
   "TARGET_SIMD"
   {
     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 						 INTVAL (operands[2]));
-    emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn (operands[0],
-							  operands[1],
-							  operands[2]));
-    DONE;
+    operands[3] = CONST0_RTX (<MODE>mode);
+    operands[4]
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+			GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
+  }
+)
+
+(define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(truncate:<VNARROWQ>
+	  (smin:<V2XWIDE>
+	    (smax:<V2XWIDE>
+	      (ashiftrt:<V2XWIDE>
+		(plus:<V2XWIDE>
+		  (sign_extend:<V2XWIDE>
+		    (match_operand:VQN 1 "register_operand" "w"))
+		  (match_operand:<V2XWIDE> 3 "aarch64_simd_rsra_rnd_imm_vec"))
+		(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
+	      (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
+	    (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
+  "TARGET_SIMD
+   && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
+  "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqrshrun_n<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(truncate:<VNARROWQ>
+	  (smin:<V2XWIDE>
+	    (smax:<V2XWIDE>
+	      (ashiftrt:<V2XWIDE>
+		(plus:<V2XWIDE>
+		  (sign_extend:<V2XWIDE>
+		    (match_operand:VQN 1 "register_operand"))
+		  (match_dup 3))
+		(match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
+	      (match_dup 4))
+	    (match_dup 5))))]
+  "TARGET_SIMD"
+  {
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
+    operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+    operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
+    operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
+    operands[4] = CONST0_RTX (<V2XWIDE>mode);
+    operands[5]
+      = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
+    operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
   }
 )
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0bb9839..2a46a31 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -4755,42 +4755,42 @@ __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_s16 (int16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_shrnv8hi (__a, __b);
+  return __builtin_aarch64_shrn_nv8hi (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_s32 (int32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_shrnv4si (__a, __b);
+  return __builtin_aarch64_shrn_nv4si (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_s64 (int64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_shrnv2di (__a, __b);
+  return __builtin_aarch64_shrn_nv2di (__a, __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_shrnv8hi_uus (__a, __b);
+  return __builtin_aarch64_shrn_nv8hi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_shrnv4si_uus (__a, __b);
+  return __builtin_aarch64_shrn_nv4si_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_shrnv2di_uus (__a, __b);
+  return __builtin_aarch64_shrn_nv2di_uus (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
@@ -5574,42 +5574,42 @@ __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_s16 (int16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_rshrnv8hi (__a, __b);
+  return __builtin_aarch64_rshrn_nv8hi (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_s32 (int32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_rshrnv4si (__a, __b);
+  return __builtin_aarch64_rshrn_nv4si (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_s64 (int64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_rshrnv2di (__a, __b);
+  return __builtin_aarch64_rshrn_nv2di (__a, __b);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_u16 (uint16x8_t __a, const int __b)
 {
-  return __builtin_aarch64_rshrnv8hi_uus (__a, __b);
+  return __builtin_aarch64_rshrn_nv8hi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_u32 (uint32x4_t __a, const int __b)
 {
-  return __builtin_aarch64_rshrnv4si_uus (__a, __b);
+  return __builtin_aarch64_rshrn_nv4si_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_n_u64 (uint64x2_t __a, const int __b)
 {
-  return __builtin_aarch64_rshrnv2di_uus (__a, __b);
+  return __builtin_aarch64_rshrn_nv2di_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 56ce125..e8c62c8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2254,6 +2254,8 @@
 ;; Signed and unsigned saturating truncations.
 (define_code_iterator SAT_TRUNC [ss_truncate us_truncate])
 
+(define_code_iterator ALL_TRUNC [ss_truncate us_truncate truncate])
+
 ;; SVE integer unary operations.
 (define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount
 				     (ss_abs "TARGET_SVE2")
@@ -2339,6 +2341,10 @@
 
 (define_code_attr SHIFTEXTEND [(ashiftrt "sign_extend") (lshiftrt "zero_extend")])
 
+(define_code_attr TRUNCEXTEND [(ss_truncate "sign_extend")
+			       (us_truncate "zero_extend")
+			       (truncate "zero_extend")])
+
 ;; For comparison operators we use the FCM* and CM* instructions.
 ;; As there are no CMLE or CMLT instructions which act on 3 vector
 ;; operands, we must use CMGE or CMGT and swap the order of the
@@ -2428,6 +2434,12 @@
 				 (ss_minus "sign_extend")
 				 (us_minus "zero_extend")])
 
+(define_code_attr TRUNC_SHIFT [(ss_truncate "ashiftrt")
+			       (us_truncate "lshiftrt") (truncate "lshiftrt")])
+
+(define_code_attr shrn_op [(ss_truncate "sq")
+			   (us_truncate "uq") (truncate "")])
+
 ;; Whether a shift is left or right.
 (define_code_attr lr [(ashift "l") (ashiftrt "r") (lshiftrt "r")])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 9391aba..b31ba6e 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -599,10 +599,17 @@
   (and (match_code "const_vector")
        (match_test "aarch64_const_vec_all_same_in_range_p (op,
 				(HOST_WIDE_INT_1U
-				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2)) - 1,
+				<< (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1,
 				(HOST_WIDE_INT_1U
-				<< (GET_MODE_UNIT_BITSIZE  (mode) / 2)) - 1)")))
+				<< (GET_MODE_UNIT_BITSIZE (mode) / 2)) - 1)")))
 
+(define_predicate "aarch64_simd_umax_quarter_mode"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op,
+				(HOST_WIDE_INT_1U
+				<< (GET_MODE_UNIT_BITSIZE (mode) / 4)) - 1,
+				(HOST_WIDE_INT_1U
+				<< (GET_MODE_UNIT_BITSIZE (mode) / 4)) - 1)")))
 (define_predicate "aarch64_simd_shift_imm_vec_qi"
   (and (match_code "const_vector")
        (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)")))