aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-10 11:50:01 +0100
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-10 11:50:01 +0100
commitc8977cf5f2daa9fecfc5d67a737506d0d31c578a (patch)
tree2572afc1ff65c49bb98f0e1d83649459b7731a28
parente7fe650692d532551ea066a378af25b3ca207bb1 (diff)
downloadgcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.zip
gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.tar.gz
gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.tar.bz2
aarch64: PR target/99195 annotate simple saturating add/sub patterns for vec-concat-zero
Moving onto the saturating instructions, this one goes through the simple add/sub ones. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (aarch64_<su_optab>q<addsub><mode>): Rename to... (aarch64_<su_optab>q<addsub><mode><vczle><vczbe>): ... This. (aarch64_<sur>qadd<mode>): Rename to... (aarch64_<sur>qadd<mode><vczle><vczbe>): ... This. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_1.c: Add testing for qadd, qsub. * gcc.target/aarch64/simd/pr99195_6.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64-simd.md4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c30
3 files changed, 41 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c1d51e3..dc6efa0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5236,7 +5236,7 @@
)
;; <su>q<addsub>
-(define_insn "aarch64_<su_optab>q<addsub><mode>"
+(define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
(match_operand:VSDQ_I 2 "register_operand" "w")))]
@@ -5247,7 +5247,7 @@
;; suqadd and usqadd
-(define_insn "aarch64_<sur>qadd<mode>"
+(define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
[(set (match_operand:VSDQ_I 0 "register_operand" "=w")
(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
(match_operand:VSDQ_I 2 "register_operand" "w")]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 5801598..4e6b341 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -57,17 +57,17 @@ OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
OPSEVEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14)
-#define OPSEVENTEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17) \
-OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
-OPTEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17)
+#define OPNINETEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17,OP18,OP19) \
+OPEIGHT (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7, OP8) \
+OPELEVEN (T, IS, OS, S, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17, OP18, OP19)
-OPSEVENTEEN (int8, 8, 16, s8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (int16, 4, 8, s16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (int32, 2, 4, s32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
-OPSEVENTEEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
+OPNINETEEN (uint32, 2, 4, u32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
OPFOURTEEN (float32, 2, 4, f32, add, padd, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm)
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
new file mode 100644
index 0000000..52ad270
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c
@@ -0,0 +1,30 @@
+/* PR target/99195. */
+/* Check that we take advantage of 64-bit Advanced SIMD operations clearing
+ the top half of the vector register and no explicit zeroing instructions
+ are emitted. */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+#define MYOP(OT,IT1,IT2,OP,OS) \
+OT \
+foo_##OP##_##OS (IT1 a, IT2 b) \
+{ \
+ IT1 zeros = vcreate_##OS (0); \
+ return vcombine_##OS (v##OP##_##OS (a, b), zeros); \
+}
+
+MYOP (int8x16_t, int8x8_t, uint8x8_t, uqadd, s8)
+MYOP (int16x8_t, int16x4_t, uint16x4_t, uqadd, s16)
+MYOP (int32x4_t, int32x2_t, uint32x2_t, uqadd, s32)
+MYOP (int64x2_t, int64x1_t, uint64x1_t, uqadd, s64)
+
+MYOP (uint8x16_t, uint8x8_t, int8x8_t, sqadd, u8)
+MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16)
+MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32)
+MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+