aarch64: Simplify QSHRN expanders and patterns

This patch deletes the explicit BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN patterns for the QSHRN instructions in favour of annotating a single one with <vczle><vczbe>. This allows simplification of the expander too. Tests are added to ensure that we still optimise away the concat-with-zero use case. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le): Delete. (aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be): Delete. (aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>): New define_insn. (aarch64_<sur>q<r>shr<u>n_n<mode>): Simplify expander. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/pr99195_5.c: New test.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-05-10 10:44:30 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-05-10 10:44:30 +0100
commit: e7fe650692d532551ea066a378af25b3ca207bb1 (patch)
tree: 7b8ecb73e58cf1a04f8d22ff6ddc15e2bb213210 /gcc/config
parent: d1e7f9993084b87e6676a5ccef3c8b7f807a6013 (diff)
download: gcc-e7fe650692d532551ea066a378af25b3ca207bb1.zip
gcc-e7fe650692d532551ea066a378af25b3ca207bb1.tar.gz
gcc-e7fe650692d532551ea066a378af25b3ca207bb1.tar.bz2
1 files changed, 10 insertions, 34 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9ad0489..c1d51e3 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6569,28 +6569,13 @@
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (unspec:<VNARROWQ>
-		[(match_operand:VQN 1 "register_operand" "w")
-		 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
-		VQSHRN_N)
-	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-	(vec_concat:<VNARROWQ2>
-	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
-	  (unspec:<VNARROWQ>
-		[(match_operand:VQN 1 "register_operand" "w")
-		 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
-		VQSHRN_N)))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(unspec:<VNARROWQ>
+	  [(match_operand:VQN 1 "register_operand" "w")
+	   (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
+	   VQSHRN_N))]
+  "TARGET_SIMD"
   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
@@ -6605,18 +6590,9 @@
   {
     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 						 INTVAL (operands[2]));
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
-		    operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
-		    operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-						 <VNARROWQ2>mode));
+    emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn (operands[0],
+							  operands[1],
+							  operands[2]));
     DONE;
   }
 )
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-05-10 10:44:30 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-05-10 10:44:30 +0100
commit	e7fe650692d532551ea066a378af25b3ca207bb1 (patch)
tree	7b8ecb73e58cf1a04f8d22ff6ddc15e2bb213210 /gcc/config
parent	d1e7f9993084b87e6676a5ccef3c8b7f807a6013 (diff)
download	gcc-e7fe650692d532551ea066a378af25b3ca207bb1.zip gcc-e7fe650692d532551ea066a378af25b3ca207bb1.tar.gz gcc-e7fe650692d532551ea066a378af25b3ca207bb1.tar.bz2