diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-01-21 14:06:16 +0000 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2021-01-21 14:08:29 +0000 |
commit | 43705f3fa343e08b2fb030460fc5e2a969954398 (patch) | |
tree | 6cf42099225170d07bb9518c6e8c6a663c8f4a24 /gcc | |
parent | 279d3a89b79f85d07a8ac4db1bebe9f60cb549e5 (diff) | |
download | gcc-43705f3fa343e08b2fb030460fc5e2a969954398.zip gcc-43705f3fa343e08b2fb030460fc5e2a969954398.tar.gz gcc-43705f3fa343e08b2fb030460fc5e2a969954398.tar.bz2 |
aarch64: Use canonical RTL for sqdmlal patterns
The aarch64_sqdml<SBINQOPS:as>l patterns are of the form:
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(SBINQOPS:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VSD_HSI 2 "register_operand" "w"))
(sign_extend:<VWIDE>
(match_operand:VSD_HSI 3 "register_operand" "w")))
(const_int 1))))]
where SBINQOPS is ss_plus and ss_minus. The problem is that for the
ss_plus case the RTL
is not canonical: the (match_oprand 1) should be the second arm of the
PLUS.
I've seen this manifest in combine missing some legitimate
simplifications because it generates
the canonical ss_plus form and fails to match the pattern.
This patch splits the patterns into the ss_plus and ss_minus forms with
the canonical form for each.
I've seen this improve my testcase (which I can't include as it's too
large and not easy to test reliably).
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>):
Split into...
(aarch64_sqdmlal<mode>): ... This...
(aarch64_sqdmlsl<mode>): ... And this.
(aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into...
(aarch64_sqdmlal_lane<mode>): ... This...
(aarch64_sqdmlsl_lane<mode>): ... And this.
(aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into...
(aarch64_sqdmlsl_laneq<mode>): ... This...
(aarch64_sqdmlal_laneq<mode>): ... And this.
(aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into...
(aarch64_sqdmlsl_n<mode>): ... This...
(aarch64_sqdmlal_n<mode>): ... And this.
(aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into...
(aarch64_sqdmlal2<mode>_internal): ... This...
(aarch64_sqdmlsl2<mode>_internal): ... And this.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 193 |
1 files changed, 172 insertions, 21 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index d7acd72..be2a5a86 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4155,9 +4155,25 @@ ;; vqdml[sa]l -(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" +(define_insn "aarch64_sqdmlal<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VSD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (match_operand:VSD_HSI 3 "register_operand" "w"))) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" + [(set_attr "type" "neon_sat_mla_<Vetype>_long")] +) + +(define_insn "aarch64_sqdmlsl<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -4167,15 +4183,39 @@ (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] "TARGET_SIMD" - "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" + "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" [(set_attr "type" "neon_sat_mla_<Vetype>_long")] ) ;; vqdml[sa]l_lane -(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" +(define_insn "aarch64_sqdmlal_lane<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); + return + "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmlsl_lane<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -4192,14 +4232,15 @@ { operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); return - "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; } [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) -(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" + +(define_insn "aarch64_sqdmlsl_laneq<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -4216,14 +4257,62 @@ { operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); return - "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; } [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) -(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" +(define_insn "aarch64_sqdmlal_laneq<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); + return + "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + + +(define_insn "aarch64_sqdmlal_lane<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_select:<VEL> + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); + return + "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmlsl_lane<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -4239,14 +4328,38 @@ { operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); return - "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; } [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) -(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" + +(define_insn "aarch64_sqdmlal_laneq<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); + return + "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + } + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmlsl_laneq<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -4262,16 +4375,16 @@ { operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); return - "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; + "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; } [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) ;; vqdml[sa]l_n -(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>" +(define_insn "aarch64_sqdmlsl_n<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -4282,15 +4395,53 @@ (match_operand:<VEL> 3 "register_operand" "<vwx>")))) (const_int 1))))] "TARGET_SIMD" - "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" + "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmlal_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend:<VWIDE> + (vec_duplicate:VD_HSI + (match_operand:<VEL> 3 "register_operand" "<vwx>")))) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) + ;; sqdml[as]l2 -(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal" +(define_insn "aarch64_sqdmlal2<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (SBINQOPS:<VWIDE> + (ss_plus:<VWIDE> + (ss_ashift:<VWIDE> + (mult:<VWIDE> + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 3 "register_operand" "w") + (match_dup 4)))) + (const_int 1)) + (match_operand:<VWIDE> 1 "register_operand" "0")))] + "TARGET_SIMD" + "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" + [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] +) + +(define_insn "aarch64_sqdmlsl2<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> @@ -4304,7 +4455,7 @@ (match_dup 4)))) (const_int 1))))] "TARGET_SIMD" - "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" + "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] ) |