diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-05-14 10:05:42 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-05-14 15:31:25 +0100 |
commit | ff3809b459db881e80f627e81ec946e7bbd7041d (patch) | |
tree | b02afb0aa23c6027c346c5cd5fcc94b1bb3e0889 /gcc | |
parent | 4206171605de65df9674a14dd9db75bf4f4ed037 (diff) | |
download | gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.zip gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.tar.gz gcc-ff3809b459db881e80f627e81ec946e7bbd7041d.tar.bz2 |
aarch64: Make sqdmlal2 patterns match canonical RTL
The sqdmlal2 patterns are hidden beneath the SBINQOPS iterator and unfortunately they don't match
canonical RTL because the simple accumulate operand comes in the first arm of the SS_PLUS.
This patch splits the SS_PLUS and SS_MINUS forms with the SS_PLUS operands set up to match
the canonical form, where the complex operand comes first.
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md
(aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal): Split into...
(aarch64_sqdmlsl2_lane<mode>_internal): ... This...
(aarch64_sqdmlal2_lane<mode>_internal): ... And this.
(aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal): Split into ...
(aarch64_sqdmlsl2_laneq<mode>_internal): ... This...
(aarch64_sqdmlal2_laneq<mode>_internal): ... And this.
(aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal): Split into...
(aarch64_sqdmlsl2_n<mode>_internal): ... This...
(aarch64_sqdmlal2_n<mode>_internal): ... And this.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 89 |
1 files changed, 80 insertions, 9 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 802cca3..e59bc7b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5374,9 +5374,9 @@ ;; vqdml[sa]l2_lane -(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" +(define_insn "aarch64_sqdmlsl2_lane<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -5395,14 +5395,40 @@ { operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); return - "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmlal2_lane<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]) + )))) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); + return + "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; } [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) -(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" +(define_insn "aarch64_sqdmlsl2_laneq<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -5421,7 +5447,33 @@ { operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); return - "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmlal2_laneq<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]) + )))) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); + return + "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; } [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) @@ -5460,9 +5512,9 @@ DONE; }) -(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" +(define_insn "aarch64_sqdmlsl2_n<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -5475,7 +5527,26 @@ (match_operand:<VEL> 3 "register_operand" "<vwx>")))) (const_int 1))))] "TARGET_SIMD" - "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" + "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmlal2_n<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_duplicate:<VHALF> + (match_operand:<VEL> 3 "register_operand" "<vwx>")))) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) |