aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-01-21 14:06:16 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-01-21 14:08:29 +0000
commit43705f3fa343e08b2fb030460fc5e2a969954398 (patch)
tree6cf42099225170d07bb9518c6e8c6a663c8f4a24 /gcc
parent279d3a89b79f85d07a8ac4db1bebe9f60cb549e5 (diff)
downloadgcc-43705f3fa343e08b2fb030460fc5e2a969954398.zip
gcc-43705f3fa343e08b2fb030460fc5e2a969954398.tar.gz
gcc-43705f3fa343e08b2fb030460fc5e2a969954398.tar.bz2
aarch64: Use canonical RTL for sqdmlal patterns
The aarch64_sqdml<SBINQOPS:as>l patterns are of the form: [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (SBINQOPS:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend:<VWIDE> (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdml<SBINQOPS:as>l<mode>): Split into... (aarch64_sqdmlal<mode>): ... This... (aarch64_sqdmlsl<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_lane<mode>): Split into... (aarch64_sqdmlal_lane<mode>): ... This... (aarch64_sqdmlsl_lane<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_laneq<mode>): Split into... (aarch64_sqdmlsl_laneq<mode>): ... This... (aarch64_sqdmlal_laneq<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l_n<mode>): Split into... (aarch64_sqdmlsl_n<mode>): ... This... (aarch64_sqdmlal_n<mode>): ... And this. (aarch64_sqdml<SBINQOPS:as>l2<mode>_internal): Split into... (aarch64_sqdmlal2<mode>_internal): ... This... (aarch64_sqdmlsl2<mode>_internal): ... And this.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md193
1 files changed, 172 insertions, 21 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d7acd72..be2a5a86 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4155,9 +4155,25 @@
;; vqdml[sa]l
-(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
+(define_insn "aarch64_sqdmlal<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (SBINQOPS:<VWIDE>
+ (ss_plus:<VWIDE>
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:VSD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (match_operand:VSD_HSI 3 "register_operand" "w")))
+ (const_int 1))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+ [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
+)
+
+(define_insn "aarch64_sqdmlsl<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@@ -4167,15 +4183,39 @@
(match_operand:VSD_HSI 3 "register_operand" "w")))
(const_int 1))))]
"TARGET_SIMD"
- "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+ "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
[(set_attr "type" "neon_sat_mla_<Vetype>_long")]
)
;; vqdml[sa]l_lane
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
+(define_insn "aarch64_sqdmlal_lane<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (SBINQOPS:<VWIDE>
+ (ss_plus:<VWIDE>
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:VD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+ ))
+ (const_int 1))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ {
+ operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
+ return
+ "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlsl_lane<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@@ -4192,14 +4232,15 @@
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
- "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
+
+(define_insn "aarch64_sqdmlsl_laneq<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (SBINQOPS:<VWIDE>
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@@ -4216,14 +4257,62 @@
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
- "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
+(define_insn "aarch64_sqdmlal_laneq<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (SBINQOPS:<VWIDE>
+ (ss_plus:<VWIDE>
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:VD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+ ))
+ (const_int 1))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ {
+ operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
+ return
+ "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+
+(define_insn "aarch64_sqdmlal_lane<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_plus:<VWIDE>
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:SD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+ )
+ (const_int 1))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ {
+ operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
+ return
+ "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlsl_lane<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@@ -4239,14 +4328,38 @@
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
return
- "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
+
+(define_insn "aarch64_sqdmlal_laneq<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (SBINQOPS:<VWIDE>
+ (ss_plus:<VWIDE>
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:SD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
+ )
+ (const_int 1))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ {
+ operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
+ return
+ "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ }
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlsl_laneq<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@@ -4262,16 +4375,16 @@
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
return
- "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
+ "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
}
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
;; vqdml[sa]l_n
-(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
+(define_insn "aarch64_sqdmlsl_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (SBINQOPS:<VWIDE>
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@@ -4282,15 +4395,53 @@
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))))]
"TARGET_SIMD"
- "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+ "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlal_n<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_plus:<VWIDE>
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (match_operand:VD_HSI 2 "register_operand" "w"))
+ (sign_extend:<VWIDE>
+ (vec_duplicate:VD_HSI
+ (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
+ (const_int 1))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)
+
;; sqdml[as]l2
-(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
+(define_insn "aarch64_sqdmlal2<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (SBINQOPS:<VWIDE>
+ (ss_plus:<VWIDE>
+ (ss_ashift:<VWIDE>
+ (mult:<VWIDE>
+ (sign_extend:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (sign_extend:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 3 "register_operand" "w")
+ (match_dup 4))))
+ (const_int 1))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+ [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
+)
+
+(define_insn "aarch64_sqdmlsl2<mode>_internal"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
@@ -4304,7 +4455,7 @@
(match_dup 4))))
(const_int 1))))]
"TARGET_SIMD"
- "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
+ "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
[(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
)