diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-24 14:52:34 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-24 14:52:34 +0100 |
commit | b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e (patch) | |
tree | 988f788df90e8cefae1af39ee3bb47c7a644a532 /gcc/testsuite | |
parent | affee7dcfa1ee272d43ac7cb68cf423dbd956fd8 (diff) | |
download | gcc-b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e.zip gcc-b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e.tar.gz gcc-b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e.tar.bz2 |
aarch64: PR target/99195 Annotate vector shift patterns for vec-concat-zero
Continuing the series of straightforward annotations, this one handles the normal (not widening or narrowing) vector shifts.
Tests included.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_simd_lshr<mode>): Rename to...
(aarch64_simd_lshr<mode><vczle><vczbe>): ... This.
(aarch64_simd_ashr<mode>): Rename to...
(aarch64_simd_ashr<mode><vczle><vczbe>): ... This.
(aarch64_simd_imm_shl<mode>): Rename to...
(aarch64_simd_imm_shl<mode><vczle><vczbe>): ... This.
(aarch64_simd_reg_sshl<mode>): Rename to...
(aarch64_simd_reg_sshl<mode><vczle><vczbe>): ... This.
(aarch64_simd_reg_shl<mode>_unsigned): Rename to...
(aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>): ... This.
(aarch64_simd_reg_shl<mode>_signed): Rename to...
(aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>): ... This.
(vec_shr_<mode>): Rename to...
(vec_shr_<mode><vczle><vczbe>): ... This.
(aarch64_<sur>shl<mode>): Rename to...
(aarch64_<sur>shl<mode><vczle><vczbe>): ... This.
(aarch64_<sur>q<r>shl<mode>): Rename to...
(aarch64_<sur>q<r>shl<mode><vczle><vczbe>): ... This.
gcc/testsuite/ChangeLog:
PR target/99195
* gcc.target/aarch64/simd/pr99195_1.c: Add testing for shifts.
* gcc.target/aarch64/simd/pr99195_6.c: Likewise.
* gcc.target/aarch64/simd/pr99195_8.c: New test.
Diffstat (limited to 'gcc/testsuite')
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c | 39 |
3 files changed, 52 insertions, 3 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c index fde501d..8b6548a 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c @@ -65,9 +65,9 @@ OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPFOUR (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2) -OPFOUR (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2) -OPFOUR (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2) +OPSIX (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2, shl, qshl) +OPSIX (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl) +OPSIX (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl) OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c index 52ad270..c86506e 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c @@ -25,6 +25,16 @@ MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16) MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32) MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64) +MYOP (uint8x16_t, uint8x8_t, int8x8_t, shl, u8) +MYOP (uint16x8_t, uint16x4_t, int16x4_t, shl, u16) +MYOP (uint32x4_t, uint32x2_t, int32x2_t, shl, u32) +MYOP (uint64x2_t, uint64x1_t, int64x1_t, shl, u64) + +MYOP (uint8x16_t, uint8x8_t, int8x8_t, qshl, u8) +MYOP (uint16x8_t, uint16x4_t, int16x4_t, qshl, u16) +MYOP (uint32x4_t, uint32x2_t, int32x2_t, qshl, u32) +MYOP (uint64x2_t, uint64x1_t, int64x1_t, qshl, u64) + /* { dg-final { scan-assembler-not {\tfmov\t} } } */ /* { dg-final { scan-assembler-not {\tmov\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c new file mode 100644 index 0000000..29499e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c @@ -0,0 +1,39 @@ +/* PR target/99195. */ +/* Check that we take advantage of 64-bit Advanced SIMD operations clearing + the top half of the vector register and no explicit zeroing instructions + are emitted. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include <arm_neon.h> + +#define MYOP(OT,IT1,OP,S,OS) \ +OT \ +foo_##OP##_##S##OS (IT1 a) \ +{ \ + IT1 zeros = vcreate_##S##OS (0); \ + return vcombine_##S##OS (v##OP##_##S##OS (a, 3), zeros); \ +} \ +OT \ +foo_##OP##_##S##OS##_s (IT1 a) \ +{ \ + IT1 zeros = vcreate_##S##OS (0); \ + return vcombine_##S##OS (v##OP##_##S##OS (a, OS - 1), zeros); \ +} + +MYOP (int8x16_t, int8x8_t, shr_n, s, 8) +MYOP (int16x8_t, int16x4_t, shr_n, s, 16) +MYOP (int32x4_t, int32x2_t, shr_n, s, 32) +MYOP (uint8x16_t, uint8x8_t, shr_n, u, 8) +MYOP (uint16x8_t, uint16x4_t, shr_n, u, 16) +MYOP (uint32x4_t, uint32x2_t, shr_n, u, 32) +MYOP (int8x16_t, int8x8_t, shl_n, s, 8) +MYOP (int16x8_t, int16x4_t, shl_n, s, 16) +MYOP (int32x4_t, int32x2_t, shl_n, s, 32) +MYOP (uint8x16_t, uint8x8_t, shl_n, u, 8) +MYOP (uint16x8_t, uint16x4_t, shl_n, u, 16) +MYOP (uint32x4_t, uint32x2_t, shl_n, u, 32) + +/* { dg-final { scan-assembler-not {\tfmov\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\t} } } */ + |