diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-10 10:40:06 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-10 10:42:22 +0100 |
commit | d1e7f9993084b87e6676a5ccef3c8b7f807a6013 (patch) | |
tree | d23b8af6329899110564f65cae7e610bcc05c6b0 | |
parent | 39d6d4256d16d676f8b9031c4d1d115ddf4ad76b (diff) | |
download | gcc-d1e7f9993084b87e6676a5ccef3c8b7f807a6013.zip gcc-d1e7f9993084b87e6676a5ccef3c8b7f807a6013.tar.gz gcc-d1e7f9993084b87e6676a5ccef3c8b7f807a6013.tar.bz2 |
aarch64: PR target/99195 annotate simple narrowing patterns for vec-concat-zero
This patch cleans up some almost-duplicate patterns for the XTN, SQXTN, UQXTN instructions.
Using the <vczle><vczbe> attributes we can remove the BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN cases,
as well as the intrinsic expanders that select between the two.
Tests are also added. Thankfully the diffstat comes out negative \O/.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le): Delete.
(aarch64_xtn<mode>_insn_be): Likewise.
(trunc<mode><Vnarrowq>2): Rename to...
(trunc<mode><Vnarrowq>2<vczle><vczbe>): ... This.
(aarch64_xtn<mode>): Move under the above. Just emit the truncate RTL.
(aarch64_<su>qmovn<mode>): Likewise.
(aarch64_<su>qmovn<mode><vczle><vczbe>): New define_insn.
(aarch64_<su>qmovn<mode>_insn_le): Delete.
(aarch64_<su>qmovn<mode>_insn_be): Likewise.
gcc/testsuite/ChangeLog:
PR target/99195
* gcc.target/aarch64/simd/pr99195_4.c: Add tests for vmovn, vqmovn.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 99 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c | 22 |
2 files changed, 37 insertions, 84 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 4603888..9ad0489 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1743,47 +1743,6 @@ ;; Narrowing operations. -(define_insn "aarch64_xtn<mode>_insn_le" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) - (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "xtn\\t%0.<Vntype>, %1.<Vtype>" - [(set_attr "type" "neon_move_narrow_q")] -) - -(define_insn "aarch64_xtn<mode>_insn_be" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero") - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "xtn\\t%0.<Vntype>, %1.<Vtype>" - [(set_attr "type" "neon_move_narrow_q")] -) - -(define_expand "aarch64_xtn<mode>" - [(set (match_operand:<VNARROWQ> 0 "register_operand") - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))] - "TARGET_SIMD" - { - rtx tmp = gen_reg_rtx (<VNARROWQ2>mode); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1], - CONST0_RTX (<VNARROWQ>mode))); - else - emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1], - CONST0_RTX (<VNARROWQ>mode))); - - /* The intrinsic expects a narrow result, so emit a subreg that will get - optimized away as appropriate. */ - emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp, - <VNARROWQ2>mode)); - DONE; - } -) - (define_insn "aarch64_xtn2<mode>_insn_le" [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") (vec_concat:<VNARROWQ2> @@ -5300,7 +5259,7 @@ ;; sqmovn and uqmovn -(define_insn "aarch64_<su>qmovn<mode>" +(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>" [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") (SAT_TRUNC:<VNARROWQ> (match_operand:SD_HSDI 1 "register_operand" "w")))] @@ -5309,48 +5268,13 @@ [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) -(define_insn "aarch64_<su>qmovn<mode>_insn_le" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (SAT_TRUNC:<VNARROWQ> - (match_operand:VQN 1 "register_operand" "w")) - (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" - [(set_attr "type" "neon_sat_shift_imm_narrow_q")] -) - -(define_insn "aarch64_<su>qmovn<mode>_insn_be" - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") - (vec_concat:<VNARROWQ2> - (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero") - (SAT_TRUNC:<VNARROWQ> - (match_operand:VQN 1 "register_operand" "w"))))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" - [(set_attr "type" "neon_sat_shift_imm_narrow_q")] -) - -(define_expand "aarch64_<su>qmovn<mode>" - [(set (match_operand:<VNARROWQ> 0 "register_operand") +(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>" + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") (SAT_TRUNC:<VNARROWQ> - (match_operand:VQN 1 "register_operand")))] + (match_operand:VQN 1 "register_operand" "w")))] "TARGET_SIMD" - { - rtx tmp = gen_reg_rtx (<VNARROWQ2>mode); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1], - CONST0_RTX (<VNARROWQ>mode))); - else - emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1], - CONST0_RTX (<VNARROWQ>mode))); - - /* The intrinsic expects a narrow result, so emit a subreg that will get - optimized away as appropriate. */ - emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp, - <VNARROWQ2>mode)); - DONE; - } + "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] ) (define_insn "aarch64_<su>qxtn2<mode>_le" @@ -9281,7 +9205,7 @@ ) ;; Truncate a 128-bit integer vector to a 64-bit vector. -(define_insn "trunc<mode><Vnarrowq>2" +(define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>" [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] "TARGET_SIMD" @@ -9289,6 +9213,15 @@ [(set_attr "type" "neon_move_narrow_q")] ) +;; Expander for the intrinsics that only takes one mode unlike the two-mode +;; trunc optab. +(define_expand "aarch64_xtn<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))] + "TARGET_SIMD" + {} +) + (define_insn "aarch64_bfdot<mode>" [(set (match_operand:VDQSF 0 "register_operand" "=w") (plus:VDQSF diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c index b6ef15b..6127cb2 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c @@ -15,7 +15,6 @@ foo_##OP##_##OS (IT a, IT b) \ return vcombine_##OS (v##OP##_##IS (a, b), zeros); \ } - #define FUNC(OT,IT,IMT,IS,OS) \ MYOP (OT, IT, IMT, addhn, IS, OS) \ MYOP (OT, IT, IMT, subhn, IS, OS) \ @@ -30,6 +29,27 @@ FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8) FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16) FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32) +#undef MYOP +#define MYOP(OT,IT,IMT,OP,IS,OS) \ +OT \ +foo_##OP##_##OS (IT a) \ +{ \ + IMT zeros = vcreate_##OS (0); \ + return vcombine_##OS (v##OP##_##IS (a), zeros); \ +} + +#undef FUNC +#define FUNC(OP) \ +MYOP (int8x16_t, int16x8_t, int8x8_t, OP, s16, s8) \ +MYOP (int16x8_t, int32x4_t, int16x4_t, OP, s32, s16) \ +MYOP (int32x4_t, int64x2_t, int32x2_t, OP, s64, s32) \ +MYOP (uint8x16_t, uint16x8_t, uint8x8_t, OP, u16, u8) \ +MYOP (uint16x8_t, uint32x4_t, uint16x4_t, OP, u32, u16) \ +MYOP (uint32x4_t, uint64x2_t, uint32x2_t, OP, u64, u32) \ + +FUNC (movn) +FUNC (qmovn) + /* { dg-final { scan-assembler-not {\tfmov\t} } } */ /* { dg-final { scan-assembler-not {\tmov\t} } } */ |