diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-10 11:50:01 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-10 11:50:01 +0100 |
commit | c8977cf5f2daa9fecfc5d67a737506d0d31c578a (patch) | |
tree | 2572afc1ff65c49bb98f0e1d83649459b7731a28 | |
parent | e7fe650692d532551ea066a378af25b3ca207bb1 (diff) | |
download | gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.zip gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.tar.gz gcc-c8977cf5f2daa9fecfc5d67a737506d0d31c578a.tar.bz2 |
aarch64: PR target/99195 annotate simple saturating add/sub patterns for vec-concat-zero
Moving onto the saturating instructions, this one goes through the simple add/sub ones.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_<su_optab>q<addsub><mode>):
Rename to...
(aarch64_<su_optab>q<addsub><mode><vczle><vczbe>): ... This.
(aarch64_<sur>qadd<mode>): Rename to...
(aarch64_<sur>qadd<mode><vczle><vczbe>): ... This.
gcc/testsuite/ChangeLog:
PR target/99195
* gcc.target/aarch64/simd/pr99195_1.c: Add testing for qadd, qsub.
* gcc.target/aarch64/simd/pr99195_6.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c | 30 |
3 files changed, 41 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c1d51e3..dc6efa0 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5236,7 +5236,7 @@ ) ;; <su>q<addsub> -(define_insn "aarch64_<su_optab>q<addsub><mode>" +(define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") (match_operand:VSDQ_I 2 "register_operand" "w")))] @@ -5247,7 +5247,7 @@ ;; suqadd and usqadd -(define_insn "aarch64_<sur>qadd<mode>" +(define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") (match_operand:VSDQ_I 2 "register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c index 5801598..4e6b341 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c @@ -57,17 +57,17 @@ OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11) OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ OPSEVEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14) -#define OPSEVENTEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17) \ -OPSEVEN (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ -OPTEN (T, IS, OS, S, OP8, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17) +#define OPNINETEEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11,OP12,OP13,OP14,OP15,OP16,OP17,OP18,OP19) \ +OPEIGHT (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5, OP6, OP7, OP8) \ +OPELEVEN (T, IS, OS, S, OP9, OP10, OP11, OP12, OP13, OP14, OP15, OP16, OP17, OP18, OP19) -OPSEVENTEEN (int8, 8, 16, s8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (int16, 4, 8, s16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (int32, 2, 4, s32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPSEVENTEEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPNINETEEN (uint32, 2, 4, u32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPFOURTEEN (float32, 2, 4, f32, add, padd, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c new file mode 100644 index 0000000..52ad270 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c @@ -0,0 +1,30 @@ +/* PR target/99195. */ +/* Check that we take advantage of 64-bit Advanced SIMD operations clearing + the top half of the vector register and no explicit zeroing instructions + are emitted. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include <arm_neon.h> + +#define MYOP(OT,IT1,IT2,OP,OS) \ +OT \ +foo_##OP##_##OS (IT1 a, IT2 b) \ +{ \ + IT1 zeros = vcreate_##OS (0); \ + return vcombine_##OS (v##OP##_##OS (a, b), zeros); \ +} + +MYOP (int8x16_t, int8x8_t, uint8x8_t, uqadd, s8) +MYOP (int16x8_t, int16x4_t, uint16x4_t, uqadd, s16) +MYOP (int32x4_t, int32x2_t, uint32x2_t, uqadd, s32) +MYOP (int64x2_t, int64x1_t, uint64x1_t, uqadd, s64) + +MYOP (uint8x16_t, uint8x8_t, int8x8_t, sqadd, u8) +MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16) +MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32) +MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64) + +/* { dg-final { scan-assembler-not {\tfmov\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\t} } } */ + |