aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJonathan Wright <jonathan.wright@arm.com>2021-07-16 15:34:38 +0100
committerJonathan Wright <jonathan.wright@arm.com>2021-07-27 10:42:33 +0100
commit3bc9db6a989671bedf19e61bd1b21f79588e99da (patch)
treed658c0f489f3895ab00b4a1351a40394b55306c1 /gcc
parentd88a6951586c7229b25708f4486eaaf4bf4b5bbe (diff)
downloadgcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.zip
gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.tar.gz
gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.tar.bz2
simplify-rtx: Push sign/zero-extension inside vec_duplicate
As a general principle, vec_duplicate should be as close to the root of an expression as possible. Where unary operations have vec_duplicate as an argument, these operations should be pushed inside the vec_duplicate. This patch modifies unary operation simplification to push sign/zero-extension of a scalar inside vec_duplicate. This patch also updates all RTL patterns in aarch64-simd.md to use the new canonical form. gcc/ChangeLog: 2021-07-19 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd.md: Push sign/zero-extension inside vec_duplicate for all patterns. * simplify-rtx.c (simplify_context::simplify_unary_operation_1): Push sign/zero-extension inside vec_duplicate.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md359
-rw-r--r--gcc/simplify-rtx.c35
2 files changed, 211 insertions, 183 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 13c8698..c5638d0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2079,14 +2079,16 @@
(define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (plus:<VWIDE>
- (mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
- (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
- (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
- (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ (plus:<VWIDE>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2154,14 +2156,16 @@
(define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (minus:<VWIDE>
- (match_operand:<VWIDE> 1 "register_operand" "0")
- (mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
- (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
- (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
"TARGET_SIMD"
"<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2197,14 +2201,14 @@
(define_insn "aarch64_<su>mlal_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (plus:<VWIDE>
- (mult:<VWIDE>
- (ANY_EXTEND:<VWIDE>
- (match_operand:VD_HSI 2 "register_operand" "w"))
- (ANY_EXTEND:<VWIDE>
- (vec_duplicate:VD_HSI
- (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
- (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ (plus:<VWIDE>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_HSI 2 "register_operand" "w"))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
"<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2226,14 +2230,14 @@
(define_insn "aarch64_<su>mlsl_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (minus:<VWIDE>
- (match_operand:<VWIDE> 1 "register_operand" "0")
- (mult:<VWIDE>
- (ANY_EXTEND:<VWIDE>
- (match_operand:VD_HSI 2 "register_operand" "w"))
- (ANY_EXTEND:<VWIDE>
- (vec_duplicate:VD_HSI
- (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_HSI 2 "register_operand" "w"))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
"TARGET_SIMD"
"<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
[(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2311,8 +2315,8 @@
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:<VCOND> 1 "register_operand" "w"))
- (ANY_EXTEND:<VWIDE>
- (vec_duplicate:<VCOND>
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:VDQHS 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
@@ -2327,13 +2331,15 @@
(define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
- (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
- (vec_select:<VEL>
- (match_operand:<VCOND> 3 "register_operand" "<vwx>")
- (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
@@ -2359,13 +2365,15 @@
(define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
- (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
- (vec_select:<VEL>
- (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
- (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
"TARGET_SIMD"
{
operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
@@ -2390,11 +2398,11 @@
(define_insn "aarch64_<su>mull_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (mult:<VWIDE>
- (ANY_EXTEND:<VWIDE>
- (match_operand:VD_HSI 1 "register_operand" "w"))
- (ANY_EXTEND:<VWIDE>
- (vec_duplicate:<VCOND>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VD_HSI 1 "register_operand" "w"))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
(match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
"TARGET_SIMD"
"<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
@@ -2404,11 +2412,12 @@
(define_insn "aarch64_<su>mull_hi_n<mode>_insn"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
(ANY_EXTEND:<VWIDE>
- (vec_duplicate:<VCOND>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
(match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
"TARGET_SIMD"
"<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
@@ -2435,8 +2444,8 @@
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:<VCOND> 2 "register_operand" "w"))
- (ANY_EXTEND:<VWIDE>
- (vec_duplicate:<VCOND>
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:VDQHS 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
@@ -2453,13 +2462,15 @@
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
- (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
- (vec_select:<VEL>
- (match_operand:<VCOND> 4 "register_operand" "<vwx>")
- (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 4 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
@@ -2488,13 +2499,15 @@
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
(mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
- (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
- (vec_select:<VEL>
- (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
- (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
{
@@ -2526,8 +2539,8 @@
(mult:<VWIDE>
(ANY_EXTEND:<VWIDE>
(match_operand:<VCOND> 2 "register_operand" "w"))
- (ANY_EXTEND:<VWIDE>
- (vec_duplicate:<VCOND>
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
(vec_select:<VEL>
(match_operand:VDQHS 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
@@ -2544,13 +2557,15 @@
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
- (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
- (vec_select:<VEL>
- (match_operand:<VCOND> 4 "register_operand" "<vwx>")
- (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (vec_select:<VEL>
+ (match_operand:<VCOND> 4 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
)))]
"TARGET_SIMD"
{
@@ -2580,13 +2595,15 @@
(minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(mult:<VWIDE>
- (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
- (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
- (vec_select:<VEL>
- (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
- (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
+ (ANY_EXTEND:<VWIDE>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (ANY_EXTEND:<VWIDE_S>
+ (vec_select:<VEL>
+ (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
+ (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
)))]
"TARGET_SIMD"
{
@@ -5313,12 +5330,12 @@
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
- ))
+ ))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
@@ -5338,12 +5355,12 @@
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
- ))
+ ))
(const_int 1))))]
"TARGET_SIMD"
{
@@ -5363,12 +5380,12 @@
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
- ))
+ ))
(const_int 1))))]
"TARGET_SIMD"
{
@@ -5386,12 +5403,12 @@
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])))
- ))
+ ))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
"TARGET_SIMD"
@@ -5507,8 +5524,8 @@
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))))]
"TARGET_SIMD"
@@ -5523,8 +5540,8 @@
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 2 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
@@ -5601,11 +5618,11 @@
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
@@ -5622,15 +5639,15 @@
(define_insn "aarch64_sqdmlal2_lane<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_plus:<VWIDE>
+ (ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCOND> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
@@ -5648,16 +5665,16 @@
(define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_minus:<VWIDE>
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
@@ -5674,15 +5691,15 @@
(define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_plus:<VWIDE>
+ (ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(vec_select:<VEL>
(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
(parallel [(match_operand:SI 4 "immediate_operand" "i")])
@@ -5734,16 +5751,16 @@
(define_insn "aarch64_sqdmlsl2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_minus:<VWIDE>
+ (ss_minus:<VWIDE>
(match_operand:<VWIDE> 1 "register_operand" "0")
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))))]
"TARGET_SIMD"
@@ -5753,15 +5770,15 @@
(define_insn "aarch64_sqdmlal2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_plus:<VWIDE>
+ (ss_plus:<VWIDE>
(ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
- (vec_select:<VHALF>
- (match_operand:VQ_HSI 2 "register_operand" "w")
- (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
+ (vec_select:<VHALF>
+ (match_operand:VQ_HSI 2 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
(match_operand:<VEL> 3 "register_operand" "<vwx>"))))
(const_int 1))
(match_operand:<VWIDE> 1 "register_operand" "0")))]
@@ -5806,13 +5823,13 @@
(define_insn "aarch64_sqdmull_lane<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_ashift:<VWIDE>
+ (ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 1 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
- (vec_select:<VEL>
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
+ (vec_select:<VEL>
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
@@ -5827,13 +5844,13 @@
(define_insn "aarch64_sqdmull_laneq<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_ashift:<VWIDE>
+ (ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 1 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
- (vec_select:<VEL>
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
+ (vec_select:<VEL>
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
@@ -5890,13 +5907,13 @@
(define_insn "aarch64_sqdmull_n<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_ashift:<VWIDE>
+ (ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(match_operand:VD_HSI 1 "register_operand" "w"))
- (sign_extend:<VWIDE>
- (vec_duplicate:VD_HSI
- (match_operand:<VEL> 2 "register_operand" "<vwx>")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
+ (match_operand:<VEL> 2 "register_operand" "<vwx>")))
)
(const_int 1)))]
"TARGET_SIMD"
@@ -5906,8 +5923,6 @@
;; vqdmull2
-
-
(define_insn "aarch64_sqdmull2<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(ss_ashift:<VWIDE>
@@ -5943,15 +5958,15 @@
(define_insn "aarch64_sqdmull2_lane<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_ashift:<VWIDE>
+ (ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
- (vec_select:<VEL>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
+ (vec_select:<VEL>
(match_operand:<VCOND> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
@@ -5966,15 +5981,15 @@
(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_ashift:<VWIDE>
+ (ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
- (vec_select:<VEL>
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
+ (vec_select:<VEL>
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")])))
))
@@ -6019,15 +6034,15 @@
(define_insn "aarch64_sqdmull2_n<mode>_internal"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (ss_ashift:<VWIDE>
+ (ss_ashift:<VWIDE>
(mult:<VWIDE>
(sign_extend:<VWIDE>
(vec_select:<VHALF>
- (match_operand:VQ_HSI 1 "register_operand" "w")
- (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
- (sign_extend:<VWIDE>
- (vec_duplicate:<VHALF>
- (match_operand:<VEL> 2 "register_operand" "<vwx>")))
+ (match_operand:VQ_HSI 1 "register_operand" "w")
+ (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+ (vec_duplicate:<VWIDE>
+ (sign_extend:<VWIDE_S>
+ (match_operand:<VEL> 2 "register_operand" "<vwx>")))
)
(const_int 1)))]
"TARGET_SIMD"
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index fd306bf..a719f57 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1717,22 +1717,35 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode,
&& vec_duplicate_p (op, &elt)
&& code != VEC_DUPLICATE)
{
- /* Try applying the operator to ELT and see if that simplifies.
- We can duplicate the result if so.
+ if (code == SIGN_EXTEND || code == ZERO_EXTEND)
+ /* Enforce a canonical order of VEC_DUPLICATE wrt other unary
+ operations by promoting VEC_DUPLICATE to the root of the expression
+ (as far as possible). */
+ temp = simplify_gen_unary (code, GET_MODE_INNER (mode),
+ elt, GET_MODE_INNER (GET_MODE (op)));
+ else
+ /* Try applying the operator to ELT and see if that simplifies.
+ We can duplicate the result if so.
- The reason we don't use simplify_gen_unary is that it isn't
- necessarily a win to convert things like:
+ The reason we traditionally haven't used simplify_gen_unary
+ for these codes is that it didn't necessarily seem to be a
+ win to convert things like:
- (neg:V (vec_duplicate:V (reg:S R)))
+ (neg:V (vec_duplicate:V (reg:S R)))
- to:
+ to:
- (vec_duplicate:V (neg:S (reg:S R)))
+ (vec_duplicate:V (neg:S (reg:S R)))
- The first might be done entirely in vector registers while the
- second might need a move between register files. */
- temp = simplify_unary_operation (code, GET_MODE_INNER (mode),
- elt, GET_MODE_INNER (GET_MODE (op)));
+ The first might be done entirely in vector registers while the
+ second might need a move between register files.
+
+ However, there also cases where promoting the vec_duplicate is
+ more efficient, and there is definite value in having a canonical
+ form when matching instruction patterns. We should consider
+ extending the simplify_gen_unary code above to more cases. */
+ temp = simplify_unary_operation (code, GET_MODE_INNER (mode),
+ elt, GET_MODE_INNER (GET_MODE (op)));
if (temp)
return gen_vec_duplicate (mode, temp);
}