aarch64: Simplify SHRN, RSHRN expanders and patterns

Now that we've got the <vczle><vczbe> annotations we can get rid of explicit !BYTES_BIG_ENDIAN and BYTES_BIG_ENDIAN patterns for the narrowing shift instructions. This allows us to clean up the expanders as well. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_shrn<mode>_insn_le): Delete. (aarch64_shrn<mode>_insn_be): Delete. (*aarch64_<srn_op>shrn<mode>_vect): Rename to... (*aarch64_<srn_op>shrn<mode><vczle><vczbe>): ... This. (aarch64_shrn<mode>): Remove reference to the above deleted patterns. (aarch64_rshrn<mode>_insn_le): Delete. (aarch64_rshrn<mode>_insn_be): Delete. (aarch64_rshrn<mode><vczle><vczbe>_insn): New define_insn. (aarch64_rshrn<mode>): Remove references to the above deleted patterns. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/pr99195_5.c: Add testing for shrn_n, rshrn_n intrinsics.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-06-06 09:56:52 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-06-06 09:56:52 +0100
commit: d2cdfafde2060ba2a96b01da223fe33c67439b94 (patch)
tree: 5235c6b2496f6361832be64d1fc783ff4daac4e1
parent: b327cbe8f4eefc91ee2bea49a1da7128adf30281 (diff)
download: gcc-d2cdfafde2060ba2a96b01da223fe33c67439b94.zip
gcc-d2cdfafde2060ba2a96b01da223fe33c67439b94.tar.gz
gcc-d2cdfafde2060ba2a96b01da223fe33c67439b94.tar.bz2
2 files changed, 14 insertions, 81 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 3b79e24..f7cf39f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1884,31 +1884,7 @@
  }
 )
 
-(define_insn "aarch64_shrn<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (truncate:<VNARROWQ>
-	    (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
-	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
-	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_shrn<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
-	  (truncate:<VNARROWQ>
-	    (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
-	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "*aarch64_<srn_op>shrn<mode>_vect"
+(define_insn "*aarch64_<srn_op>shrn<mode><vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
         (truncate:<VNARROWQ>
           (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
@@ -1979,48 +1955,17 @@
   {
     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 						 INTVAL (operands[2]));
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1],
-				operands[2], CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1],
-				operands[2], CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-						 <VNARROWQ2>mode));
-    DONE;
   }
 )
 
-(define_insn "aarch64_rshrn<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (truncate:<VNARROWQ>
-	    (lshiftrt:VQN
-	      (plus:VQN (match_operand:VQN 1 "register_operand" "w")
-			(match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
-	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
-	  (match_operand:<VNARROWQ> 4 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN
-   && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
-      == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
-  "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_rshrn<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (match_operand:<VNARROWQ> 4 "aarch64_simd_or_scalar_imm_zero")
-	  (truncate:<VNARROWQ>
-	    (lshiftrt:VQN
-	      (plus:VQN (match_operand:VQN 1 "register_operand" "w")
-			(match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
-	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
+(define_insn "aarch64_rshrn<mode><vczle><vczbe>_insn"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(truncate:<VNARROWQ>
+	  (lshiftrt:VQN
+	    (plus:VQN (match_operand:VQN 1 "register_operand" "w")
+		      (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
+	    (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
+  "TARGET_SIMD
    && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
       == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
   "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
@@ -2044,24 +1989,10 @@
 	  = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 					       HOST_WIDE_INT_1U
 					        << (INTVAL (operands[2]) - 1));
-	rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
 	operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 						         INTVAL (operands[2]));
-	if (BYTES_BIG_ENDIAN)
-	  emit_insn (
-		gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
-						 operands[2], shft,
-						 CONST0_RTX (<VNARROWQ>mode)));
-	else
-	  emit_insn (
-		gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
-						 operands[2], shft,
-						 CONST0_RTX (<VNARROWQ>mode)));
-
-	/* The intrinsic expects a narrow result, so emit a subreg that will
-	   get optimized away as appropriate.  */
-	emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-						     <VNARROWQ2>mode));
+	emit_insn (gen_aarch64_rshrn<mode>_insn (operands[0], operands[1],
+						 operands[2], shft));
       }
     DONE;
   }
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c
index a07f821..d114324 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c
@@ -17,7 +17,9 @@ foo_##OP##_##OS (IT a)                     \
 
 #define FUNC(OT,IT,IMT,IS,OS)			\
 MYOP (OT, IT, IMT, qshrn_n, IS, OS)		\
-MYOP (OT, IT, IMT, qrshrn_n, IS, OS)
+MYOP (OT, IT, IMT, qrshrn_n, IS, OS)		\
+MYOP (OT, IT, IMT, shrn_n, IS, OS)		\
+MYOP (OT, IT, IMT, rshrn_n, IS, OS)
 
 #define FUNCUN(OT,IT,IMT,IS,OS)			\
 MYOP (OT, IT, IMT, qshrun_n, IS, OS)		\
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-06-06 09:56:52 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-06-06 09:56:52 +0100
commit	d2cdfafde2060ba2a96b01da223fe33c67439b94 (patch)
tree	5235c6b2496f6361832be64d1fc783ff4daac4e1
parent	b327cbe8f4eefc91ee2bea49a1da7128adf30281 (diff)
download	gcc-d2cdfafde2060ba2a96b01da223fe33c67439b94.zip gcc-d2cdfafde2060ba2a96b01da223fe33c67439b94.tar.gz gcc-d2cdfafde2060ba2a96b01da223fe33c67439b94.tar.bz2