diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-10 12:00:17 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-10 12:01:45 +0100 |
commit | 3ed5677bb61b334a2d01c769859cdd3279e12a07 (patch) | |
tree | e2cf0377218d9236d0759d9affafeb3199f0dd5c | |
parent | c8977cf5f2daa9fecfc5d67a737506d0d31c578a (diff) | |
download | gcc-3ed5677bb61b334a2d01c769859cdd3279e12a07.zip gcc-3ed5677bb61b334a2d01c769859cdd3279e12a07.tar.gz gcc-3ed5677bb61b334a2d01c769859cdd3279e12a07.tar.bz2 |
[PATCH] aarch64: PR target/99195 annotate simple permutation patterns for vec-concat-zero
Another straightforward patch annotating patterns for the zip1, zip2, uzp1, uzp2, rev* instructions, plus tests.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_<PERMUTE:perm_insn><mode>):
Rename to...
(aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>): ... This.
(aarch64_rev<REVERSE:rev_op><mode>): Rename to...
(aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>): ... This.
gcc/testsuite/ChangeLog:
PR target/99195
* gcc.target/aarch64/simd/pr99195_1.c: Add tests for zip and rev
intrinsics.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c | 20 |
2 files changed, 16 insertions, 8 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index dc6efa0..500d92c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7990,7 +7990,7 @@ ;; This instruction's pattern is generated directly by ;; aarch64_expand_vec_perm_const, so any changes to the pattern would ;; need corresponding changes there. -(define_insn "aarch64_<PERMUTE:perm_insn><mode>" +(define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") (match_operand:VALL_F16 2 "register_operand" "w")] @@ -8022,7 +8022,7 @@ ;; This instruction's pattern is generated directly by ;; aarch64_expand_vec_perm_const, so any changes to the pattern would ;; need corresponding changes there. -(define_insn "aarch64_rev<REVERSE:rev_op><mode>" +(define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] REVERSE))] diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c index 4e6b341..fde501d 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c @@ -65,10 +65,18 @@ OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPFOUR (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2) +OPFOUR (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2) +OPFOUR (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2) + OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (uint32, 2, 4, u32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) +OPFOUR (uint8, 8, 16, u8, zip1, zip2, uzp1, uzp2) +OPFOUR (uint16, 4, 8, u16, zip1, zip2, uzp1, uzp2) +OPFOUR (uint32, 2, 4, u32, zip1, zip2, uzp1, uzp2) + OPFOURTEEN (float32, 2, 4, f32, add, padd, sub, mul, div, max, maxnm, min, minnm, abd, pmax, pmin, pmaxnm, pminnm) #define UNARY(OT,IT,OP,S) \ @@ -81,13 +89,13 @@ foo_##IT##OP##_##S (IT a) \ #undef FUNC #define FUNC(T,IS,OS,OP,S) UNARY (T##x##OS##_t, T##x##IS##_t, OP, S) -OPSEVEN (int8, 8, 16, s8, neg, abs, rbit, clz, cls, cnt, mvn) -OPFIVE (int16, 4, 8, s16, neg, abs, clz, cls, mvn) -OPFIVE (int32, 2, 4, s32, neg, abs, clz, cls, mvn) +OPTEN (int8, 8, 16, s8, neg, abs, rbit, clz, cls, cnt, mvn, rev16, rev32, rev64) +OPSEVEN (int16, 4, 8, s16, neg, abs, clz, cls, mvn, rev32, rev64) +OPSIX (int32, 2, 4, s32, neg, abs, clz, cls, mvn, rev64) -OPFIVE (uint8, 8, 16, u8, rbit, clz, cnt, cls, mvn) -OPTHREE (uint16, 4, 8, u16, clz, cls, mvn) -OPTHREE (uint32, 2, 4, u32, clz, cls, mvn) +OPEIGHT (uint8, 8, 16, u8, rbit, clz, cnt, cls, mvn, rev16, rev32, rev64) +OPFIVE (uint16, 4, 8, u16, clz, cls, mvn, rev32, rev64) +OPFOUR (uint32, 2, 4, u32, clz, cls, mvn, rev64) OPTEN (float32, 2, 4, f32, neg, abs, sqrt, rnd, rndi, rndm, rnda, rndn, rndp, rndx) /* { dg-final { scan-assembler-not {\tfmov\t} } } */ |