diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-10 10:44:30 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-10 10:44:30 +0100 |
commit | e7fe650692d532551ea066a378af25b3ca207bb1 (patch) | |
tree | 7b8ecb73e58cf1a04f8d22ff6ddc15e2bb213210 | |
parent | d1e7f9993084b87e6676a5ccef3c8b7f807a6013 (diff) | |
download | gcc-e7fe650692d532551ea066a378af25b3ca207bb1.zip gcc-e7fe650692d532551ea066a378af25b3ca207bb1.tar.gz gcc-e7fe650692d532551ea066a378af25b3ca207bb1.tar.bz2 |
aarch64: Simplify QSHRN expanders and patterns
This patch deletes the explicit BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN patterns for the QSHRN instructions in favour
of annotating a single one with <vczle><vczbe>. This allows simplification of the expander too.
Tests are added to ensure that we still optimise away the concat-with-zero use case.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md
(aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le): Delete.
(aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be): Delete.
(aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>): New define_insn.
(aarch64_<sur>q<r>shr<u>n_n<mode>): Simplify expander.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/simd/pr99195_5.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 44 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c | 40 |
2 files changed, 50 insertions, 34 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 9ad0489..c1d51e3 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6569,28 +6569,13 @@ [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) -(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (unspec:<VNARROWQ> - [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] - VQSHRN_N) - (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" - [(set_attr "type" "neon_shift_imm_narrow_q")] -) - -(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero") - (unspec:<VNARROWQ> - [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] - VQSHRN_N)))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" +(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn<vczle><vczbe>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") + (unspec:<VNARROWQ> + [(match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")] + VQSHRN_N))] + "TARGET_SIMD" "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" [(set_attr "type" "neon_shift_imm_narrow_q")] ) @@ -6605,18 +6590,9 @@ { operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode, INTVAL (operands[2])); - rtx tmp = gen_reg_rtx (<VNARROWQ2>mode); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp, - operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode))); - else - emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp, - operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode))); - - /* The intrinsic expects a narrow result, so emit a subreg that will get - optimized away as appropriate. */ - emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp, - <VNARROWQ2>mode)); + emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn (operands[0], + operands[1], + operands[2])); DONE; } ) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c new file mode 100644 index 0000000..a07f821 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_5.c @@ -0,0 +1,40 @@ +/* PR target/99195. */ +/* Check that we take advantage of 64-bit Advanced SIMD operations clearing + the top half of the vector register and no explicit zeroing instructions + are emitted. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include <arm_neon.h> + +#define MYOP(OT,IT,IMT,OP,IS,OS) \ +OT \ +foo_##OP##_##OS (IT a) \ +{ \ + IMT zeros = vcreate_##OS (0); \ + return vcombine_##OS (v##OP##_##IS (a, 3), zeros); \ +} + +#define FUNC(OT,IT,IMT,IS,OS) \ +MYOP (OT, IT, IMT, qshrn_n, IS, OS) \ +MYOP (OT, IT, IMT, qrshrn_n, IS, OS) + +#define FUNCUN(OT,IT,IMT,IS,OS) \ +MYOP (OT, IT, IMT, qshrun_n, IS, OS) \ +MYOP (OT, IT, IMT, qrshrun_n, IS, OS) + +FUNC (int8x16_t, int16x8_t, int8x8_t, s16, s8) +FUNC (int16x8_t, int32x4_t, int16x4_t, s32, s16) +FUNC (int32x4_t, int64x2_t, int32x2_t, s64, s32) +FUNCUN (uint8x16_t, int16x8_t, uint8x8_t, s16, u8) +FUNCUN (uint16x8_t, int32x4_t, uint16x4_t, s32, u16) +FUNCUN (uint32x4_t, int64x2_t, uint32x2_t, s64, u32) + +FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8) +FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16) +FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32) + + +/* { dg-final { scan-assembler-not {\tfmov\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\t} } } */ + |