diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-24 14:52:34 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-24 14:52:34 +0100 |
commit | b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e (patch) | |
tree | 988f788df90e8cefae1af39ee3bb47c7a644a532 | |
parent | affee7dcfa1ee272d43ac7cb68cf423dbd956fd8 (diff) | |
download | gcc-b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e.zip gcc-b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e.tar.gz gcc-b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e.tar.bz2 |
aarch64: PR target/99195 Annotate vector shift patterns for vec-concat-zero
Continuing the series of straightforward annotations, this one handles the normal (not widening or narrowing) vector shifts.
Tests included.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_simd_lshr<mode>): Rename to...
(aarch64_simd_lshr<mode><vczle><vczbe>): ... This.
(aarch64_simd_ashr<mode>): Rename to...
(aarch64_simd_ashr<mode><vczle><vczbe>): ... This.
(aarch64_simd_imm_shl<mode>): Rename to...
(aarch64_simd_imm_shl<mode><vczle><vczbe>): ... This.
(aarch64_simd_reg_sshl<mode>): Rename to...
(aarch64_simd_reg_sshl<mode><vczle><vczbe>): ... This.
(aarch64_simd_reg_shl<mode>_unsigned): Rename to...
(aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>): ... This.
(aarch64_simd_reg_shl<mode>_signed): Rename to...
(aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>): ... This.
(vec_shr_<mode>): Rename to...
(vec_shr_<mode><vczle><vczbe>): ... This.
(aarch64_<sur>shl<mode>): Rename to...
(aarch64_<sur>shl<mode><vczle><vczbe>): ... This.
(aarch64_<sur>q<r>shl<mode>): Rename to...
(aarch64_<sur>q<r>shl<mode><vczle><vczbe>): ... This.
gcc/testsuite/ChangeLog:
PR target/99195
* gcc.target/aarch64/simd/pr99195_1.c: Add testing for shifts.
* gcc.target/aarch64/simd/pr99195_6.c: Likewise.
* gcc.target/aarch64/simd/pr99195_8.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c | 39 |
4 files changed, 61 insertions, 12 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index af95bbb..0df9731 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1280,7 +1280,7 @@ DONE; }) -(define_insn "aarch64_simd_lshr<mode>" +(define_insn "aarch64_simd_lshr<mode><vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] @@ -1289,7 +1289,7 @@ [(set_attr "type" "neon_shift_imm<q>")] ) -(define_insn "aarch64_simd_ashr<mode>" +(define_insn "aarch64_simd_ashr<mode><vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w") (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))] @@ -1312,7 +1312,7 @@ [(set_attr "type" "neon_shift_acc<q>")] ) -(define_insn "aarch64_simd_imm_shl<mode>" +(define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))] @@ -1321,7 +1321,7 @@ [(set_attr "type" "neon_shift_imm<q>")] ) -(define_insn "aarch64_simd_reg_sshl<mode>" +(define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "register_operand" "w")))] @@ -1330,7 +1330,7 @@ [(set_attr "type" "neon_shift_reg<q>")] ) -(define_insn "aarch64_simd_reg_shl<mode>_unsigned" +(define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "register_operand" "w")] @@ -1340,7 +1340,7 @@ [(set_attr "type" "neon_shift_reg<q>")] ) -(define_insn "aarch64_simd_reg_shl<mode>_signed" +(define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "register_operand" "w")] @@ -1522,7 +1522,7 @@ ) ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. -(define_insn "vec_shr_<mode>" +(define_insn "vec_shr_<mode><vczle><vczbe>" [(set (match_operand:VD 0 "register_operand" "=w") (unspec:VD [(match_operand:VD 1 "register_operand" "w") (match_operand:SI 2 "immediate_operand" "i")] @@ -6340,7 +6340,7 @@ ;; vshl -(define_insn "aarch64_<sur>shl<mode>" +(define_insn "aarch64_<sur>shl<mode><vczle><vczbe>" [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") @@ -6354,7 +6354,7 @@ ;; vqshl -(define_insn "aarch64_<sur>q<r>shl<mode>" +(define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c index fde501d..8b6548a 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c @@ -65,9 +65,9 @@ OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPFOUR (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2) -OPFOUR (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2) -OPFOUR (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2) +OPSIX (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2, shl, qshl) +OPSIX (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl) +OPSIX (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl) OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c index 52ad270..c86506e 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c @@ -25,6 +25,16 @@ MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16) MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32) MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64) +MYOP (uint8x16_t, uint8x8_t, int8x8_t, shl, u8) +MYOP (uint16x8_t, uint16x4_t, int16x4_t, shl, u16) +MYOP (uint32x4_t, uint32x2_t, int32x2_t, shl, u32) +MYOP (uint64x2_t, uint64x1_t, int64x1_t, shl, u64) + +MYOP (uint8x16_t, uint8x8_t, int8x8_t, qshl, u8) +MYOP (uint16x8_t, uint16x4_t, int16x4_t, qshl, u16) +MYOP (uint32x4_t, uint32x2_t, int32x2_t, qshl, u32) +MYOP (uint64x2_t, uint64x1_t, int64x1_t, qshl, u64) + /* { dg-final { scan-assembler-not {\tfmov\t} } } */ /* { dg-final { scan-assembler-not {\tmov\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c new file mode 100644 index 0000000..29499e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c @@ -0,0 +1,39 @@ +/* PR target/99195. */ +/* Check that we take advantage of 64-bit Advanced SIMD operations clearing + the top half of the vector register and no explicit zeroing instructions + are emitted. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include <arm_neon.h> + +#define MYOP(OT,IT1,OP,S,OS) \ +OT \ +foo_##OP##_##S##OS (IT1 a) \ +{ \ + IT1 zeros = vcreate_##S##OS (0); \ + return vcombine_##S##OS (v##OP##_##S##OS (a, 3), zeros); \ +} \ +OT \ +foo_##OP##_##S##OS##_s (IT1 a) \ +{ \ + IT1 zeros = vcreate_##S##OS (0); \ + return vcombine_##S##OS (v##OP##_##S##OS (a, OS - 1), zeros); \ +} + +MYOP (int8x16_t, int8x8_t, shr_n, s, 8) +MYOP (int16x8_t, int16x4_t, shr_n, s, 16) +MYOP (int32x4_t, int32x2_t, shr_n, s, 32) +MYOP (uint8x16_t, uint8x8_t, shr_n, u, 8) +MYOP (uint16x8_t, uint16x4_t, shr_n, u, 16) +MYOP (uint32x4_t, uint32x2_t, shr_n, u, 32) +MYOP (int8x16_t, int8x8_t, shl_n, s, 8) +MYOP (int16x8_t, int16x4_t, shl_n, s, 16) +MYOP (int32x4_t, int32x2_t, shl_n, s, 32) +MYOP (uint8x16_t, uint8x8_t, shl_n, u, 8) +MYOP (uint16x8_t, uint16x4_t, shl_n, u, 16) +MYOP (uint32x4_t, uint32x2_t, shl_n, u, 32) + +/* { dg-final { scan-assembler-not {\tfmov\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\t} } } */ + |