aarch64: PR target/99195 annotate simple saturating add/sub patterns for vec-concat-zero

Moving onto the saturating instructions, this one goes through the simple add/sub ones. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (aarch64_<su_optab>q<addsub><mode>): Rename to... (aarch64_<su_optab>q<addsub><mode><vczle><vczbe>): ... This. (aarch64_<sur>qadd<mode>): Rename to... (aarch64_<sur>qadd<mode><vczle><vczbe>): ... This. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_1.c: Add testing for qadd, qsub. * gcc.target/aarch64/simd/pr99195_6.c: New test.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-05-10 11:50:01 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-05-10 11:50:01 +0100
commit: c8977cf5f2daa9fecfc5d67a737506d0d31c578a (patch)
tree: 2572afc1ff65c49bb98f0e1d83649459b7731a28
parent: e7fe650692d532551ea066a378af25b3ca207bb1 (diff)
download: gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.zip
gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.tar.gz
gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.tar.bz2
3 files changed, 41 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c1d51e3..dc6efa0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5236,7 +5236,7 @@
 )
 ;; <su>q<addsub>
 
-(define_insn "aarch64_<su_optab>q<addsub><mode>"
+(define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
 	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
 			(match_operand:VSDQ_I 2 "register_operand" "w")))]
@@ -5247,7 +5247,7 @@
 
 ;; suqadd and usqadd
 
-(define_insn "aarch64_<sur>qadd<mode>"
+(define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
 	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
 			(match_operand:VSDQ_I 2 "register_operand" "w")]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 5801598..4e6b341 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -57,17 +57,17 @@ OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
 OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7)                \
 OPSEVEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14)
 
-#define OPSEVENTEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17)        \
-OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7)                \
-OPTEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17)
+#define OPNINETEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17,OP18,OP19)        \
+OPEIGHT (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7, OP8)                \
+OPELEVEN (T, IS, OS, S, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17, OP18, OP19)
 
-OPSEVENTEEN (int8, 8, 16, s8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (int16, 4, 8, s16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (int32, 2, 4, s32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
-OPSEVENTEEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint32, 2, 4, u32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
 OPFOURTEEN (float32, 2, 4, f32, add, padd, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm)
 
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
new file mode 100644
index 0000000..52ad270
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
@@ -0,0 +1,30 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT1,IT2,OP,OS)                         \
+OT                                              \
+foo_##OP##_##OS (IT1 a, IT2 b)                     \
+{                                               \
+  IT1 zeros = vcreate_##OS (0);                   \
+  return vcombine_##OS (v##OP##_##OS (a, b), zeros);      \
+}
+
+MYOP (int8x16_t, int8x8_t, uint8x8_t, uqadd, s8)
+MYOP (int16x8_t, int16x4_t, uint16x4_t, uqadd, s16)
+MYOP (int32x4_t, int32x2_t, uint32x2_t, uqadd, s32)
+MYOP (int64x2_t, int64x1_t, uint64x1_t, uqadd, s64)
+
+MYOP (uint8x16_t, uint8x8_t, int8x8_t, sqadd, u8)
+MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16)
+MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32)
+MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-05-10 11:50:01 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-05-10 11:50:01 +0100
commit	c8977cf5f2daa9fecfc5d67a737506d0d31c578a (patch)
tree	2572afc1ff65c49bb98f0e1d83649459b7731a28
parent	e7fe650692d532551ea066a378af25b3ca207bb1 (diff)
download	gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.zip gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.tar.gz gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.tar.bz2