diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-30 10:41:02 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-30 10:41:02 +0100 |
commit | 365b1d5493988b6bd40183d1fe49bd8a3b32a6bb (patch) | |
tree | 9f201d77c1ef7c1962eb80da093aa0d983206813 | |
parent | a4dae58abe1a3961aece740b0fada995750c277c (diff) | |
download | gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.zip gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.tar.gz gcc-365b1d5493988b6bd40183d1fe49bd8a3b32a6bb.tar.bz2 |
aarch64: Convert ADDLP and ADALP patterns to standard RTL codes
This patch converts the patterns for the integer widen and pairwise-add instructions
to standard RTL operations. The pairwise addition withing a vector can be represented
as an addition of two vec_selects, one selecting the even elements, and one selecting odd.
Thus for the intrinsic vpaddlq_s8 we can generate:
(set (reg:V8HI 92)
(plus:V8HI (vec_select:V8HI (sign_extend:V16HI (reg/v:V16QI 93 [ a ]))
(parallel [
(const_int 0 [0])
(const_int 2 [0x2])
(const_int 4 [0x4])
(const_int 6 [0x6])
(const_int 8 [0x8])
(const_int 10 [0xa])
(const_int 12 [0xc])
(const_int 14 [0xe])
]))
(vec_select:V8HI (sign_extend:V16HI (reg/v:V16QI 93 [ a ]))
(parallel [
(const_int 1 [0x1])
(const_int 3 [0x3])
(const_int 5 [0x5])
(const_int 7 [0x7])
(const_int 9 [0x9])
(const_int 11 [0xb])
(const_int 13 [0xd])
(const_int 15 [0xf])
]))))
Similarly for the accumulating forms where there's an extra outer PLUS for the accumulation.
We already have the handy helper functions aarch64_stepped_int_parallel_p and
aarch64_gen_stepped_int_parallel defined in aarch64.cc that we can make use of to define
the right predicate for the VEC_SELECT PARALLEL.
This patch allows us to remove some code iterators and the UNSPEC definitions for SADDLP and UADDLP.
UNSPEC_UADALP and UNSPEC_SADALP are retained because they are used by SVE2 patterns still.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (aarch64_<sur>adalp<mode>): Delete.
(aarch64_<su>adalp<mode>): New define_expand.
(*aarch64_<su>adalp<mode><vczle><vczbe>_insn): New define_insn.
(aarch64_<su>addlp<mode>): Convert to define_expand.
(*aarch64_<su>addlp<mode><vczle><vczbe>_insn): New define_insn.
* config/aarch64/iterators.md (UNSPEC_SADDLP, UNSPEC_UADDLP): Delete.
(ADALP): Likewise.
(USADDLP): Likewise.
* config/aarch64/predicates.md (vect_par_cnst_even_or_odd_half): Define.
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 69 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 15 |
3 files changed, 74 insertions, 19 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c4171ed..2d87ed7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1039,13 +1039,39 @@ } ) -(define_insn "aarch64_<sur>adalp<mode>" +(define_expand "aarch64_<su>adalp<mode>" + [(set (match_operand:<VDBLW> 0 "register_operand") + (plus:<VDBLW> + (plus:<VDBLW> + (vec_select:<VDBLW> + (ANY_EXTEND:<V2XWIDE> + (match_operand:VDQV_L 2 "register_operand")) + (match_dup 3)) + (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2)) + (match_dup 4))) + (match_operand:<VDBLW> 1 "register_operand")))] + "TARGET_SIMD" + { + int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2; + operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2); + operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2); + } +) + +(define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn" [(set (match_operand:<VDBLW> 0 "register_operand" "=w") - (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w") - (match_operand:<VDBLW> 1 "register_operand" "0")] - ADALP))] - "TARGET_SIMD" - "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>" + (plus:<VDBLW> + (plus:<VDBLW> + (vec_select:<VDBLW> + (ANY_EXTEND:<V2XWIDE> + (match_operand:VDQV_L 2 "register_operand" "w")) + (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" "")) + (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2)) + (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" ""))) + (match_operand:<VDBLW> 1 "register_operand" "0")))] + "TARGET_SIMD + && !rtx_equal_p (operands[3], operands[4])" + "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>" [(set_attr "type" "neon_reduc_add<q>")] ) @@ -3699,11 +3725,34 @@ [(set_attr "type" "neon_reduc_add<VDQV_L:q>")] ) -(define_insn "aarch64_<su>addlp<mode>" - [(set (match_operand:<VDBLW> 0 "register_operand" "=w") - (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")] - USADDLP))] +(define_expand "aarch64_<su>addlp<mode>" + [(set (match_operand:<VDBLW> 0 "register_operand") + (plus:<VDBLW> + (vec_select:<VDBLW> + (ANY_EXTEND:<V2XWIDE> + (match_operand:VDQV_L 1 "register_operand")) + (match_dup 2)) + (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1)) + (match_dup 3))))] "TARGET_SIMD" + { + int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2; + operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2); + operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2); + } +) + +(define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn" + [(set (match_operand:<VDBLW> 0 "register_operand" "=w") + (plus:<VDBLW> + (vec_select:<VDBLW> + (ANY_EXTEND:<V2XWIDE> + (match_operand:VDQV_L 1 "register_operand" "w")) + (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half")) + (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1)) + (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))] + "TARGET_SIMD + && !rtx_equal_p (operands[2], operands[3])" "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>" [(set_attr "type" "neon_reduc_add<q>")] ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index cf49f00..d9c7354 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -667,8 +667,6 @@ UNSPEC_SSHLL ; Used in aarch64-simd.md. UNSPEC_USHLL ; Used in aarch64-simd.md. UNSPEC_ADDP ; Used in aarch64-simd.md. - UNSPEC_SADDLP ; Used in aarch64-simd.md. - UNSPEC_UADDLP ; Used in aarch64-simd.md. UNSPEC_TBL ; Used in vector permute patterns. UNSPEC_TBX ; Used in vector permute patterns. UNSPEC_CONCAT ; Used in vector permute patterns. @@ -2581,9 +2579,6 @@ ;; Int Iterators. ;; ------------------------------------------------------------------- -;; The unspec codes for the SADALP, UADALP AdvancedSIMD instructions. -(define_int_iterator ADALP [UNSPEC_SADALP UNSPEC_UADALP]) - (define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV UNSPEC_SMAXV UNSPEC_SMINV]) @@ -2594,8 +2589,6 @@ (define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV]) -(define_int_iterator USADDLP [UNSPEC_SADDLP UNSPEC_UADDLP]) - (define_int_iterator USADDLV [UNSPEC_SADDLV UNSPEC_UADDLV]) (define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF]) @@ -3339,8 +3332,6 @@ ;; "s" for signed operations and "u" for unsigned ones. (define_int_attr su [(UNSPEC_SADDV "s") (UNSPEC_UADDV "u") - (UNSPEC_SADDLP "s") - (UNSPEC_UADDLP "u") (UNSPEC_SADDLV "s") (UNSPEC_UADDLV "u") (UNSPEC_UNPACKSHI "s") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 0ce3741..3cbc735 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -460,6 +460,21 @@ return aarch64_simd_check_vect_par_cnst_half (op, mode, false); }) +;; PARALLEL for a vec_select that selects all the even or all the odd +;; elements of a vector of MODE. +(define_special_predicate "vect_par_cnst_even_or_odd_half" + (match_code "parallel") +{ + int nunits = XVECLEN (op, 0); + if (!known_eq (GET_MODE_NUNITS (mode), nunits * 2)) + return false; + rtx first = XVECEXP (op, 0, 0); + if (!CONST_INT_P (first)) + return false; + return (INTVAL (first) == 0 || INTVAL (first) == 1) + && aarch64_stepped_int_parallel_p (op, 2); +}) + (define_predicate "descending_int_parallel" (match_code "parallel") { |