diff options
author | Jonathan Wright <jonathan.wright@arm.com> | 2021-07-16 15:34:38 +0100 |
---|---|---|
committer | Jonathan Wright <jonathan.wright@arm.com> | 2021-07-27 10:42:33 +0100 |
commit | 3bc9db6a989671bedf19e61bd1b21f79588e99da (patch) | |
tree | d658c0f489f3895ab00b4a1351a40394b55306c1 /gcc | |
parent | d88a6951586c7229b25708f4486eaaf4bf4b5bbe (diff) | |
download | gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.zip gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.tar.gz gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.tar.bz2 |
simplify-rtx: Push sign/zero-extension inside vec_duplicate
As a general principle, vec_duplicate should be as close to the root
of an expression as possible. Where unary operations have
vec_duplicate as an argument, these operations should be pushed
inside the vec_duplicate.
This patch modifies unary operation simplification to push
sign/zero-extension of a scalar inside vec_duplicate.
This patch also updates all RTL patterns in aarch64-simd.md to use
the new canonical form.
gcc/ChangeLog:
2021-07-19 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-simd.md: Push sign/zero-extension
inside vec_duplicate for all patterns.
* simplify-rtx.c (simplify_context::simplify_unary_operation_1):
Push sign/zero-extension inside vec_duplicate.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 359 | ||||
-rw-r--r-- | gcc/simplify-rtx.c | 35 |
2 files changed, 211 insertions, 183 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 13c8698..c5638d0 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2079,14 +2079,16 @@ (define_insn "aarch64_<su>mlal_hi_n<mode>_insn" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (plus:<VWIDE> - (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND> - (match_operand:<VEL> 4 "register_operand" "<h_con>")))) - (match_operand:<VWIDE> 1 "register_operand" "0")))] + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (match_operand:<VEL> 4 "register_operand" "<h_con>")))) + (match_operand:<VWIDE> 1 "register_operand" "0")))] "TARGET_SIMD" "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]" [(set_attr "type" "neon_mla_<Vetype>_long")] @@ -2154,14 +2156,16 @@ (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (minus:<VWIDE> - (match_operand:<VWIDE> 1 "register_operand" "0") - (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND> - (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))] + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))] "TARGET_SIMD" "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]" [(set_attr "type" "neon_mla_<Vetype>_long")] @@ -2197,14 +2201,14 @@ (define_insn "aarch64_<su>mlal_n<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (plus:<VWIDE> - (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> - (match_operand:VD_HSI 2 "register_operand" "w")) - (ANY_EXTEND:<VWIDE> - (vec_duplicate:VD_HSI - (match_operand:<VEL> 3 "register_operand" "<h_con>")))) - (match_operand:<VWIDE> 1 "register_operand" "0")))] + (plus:<VWIDE> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (match_operand:<VEL> 3 "register_operand" "<h_con>")))) + (match_operand:<VWIDE> 1 "register_operand" "0")))] "TARGET_SIMD" "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]" [(set_attr "type" "neon_mla_<Vetype>_long")] @@ -2226,14 +2230,14 @@ (define_insn "aarch64_<su>mlsl_n<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (minus:<VWIDE> - (match_operand:<VWIDE> 1 "register_operand" "0") - (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> - (match_operand:VD_HSI 2 "register_operand" "w")) - (ANY_EXTEND:<VWIDE> - (vec_duplicate:VD_HSI - (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))] + (minus:<VWIDE> + (match_operand:<VWIDE> 1 "register_operand" "0") + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VD_HSI 2 "register_operand" "w")) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))] "TARGET_SIMD" "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]" [(set_attr "type" "neon_mla_<Vetype>_long")] @@ -2311,8 +2315,8 @@ (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (match_operand:<VCOND> 1 "register_operand" "w")) - (ANY_EXTEND:<VWIDE> - (vec_duplicate:<VCOND> + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> (vec_select:<VEL> (match_operand:VDQHS 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))] @@ -2327,13 +2331,15 @@ (define_insn "aarch64_<su>mull_hi_lane<mode>_insn" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> - (vec_select:<VEL> - (match_operand:<VCOND> 3 "register_operand" "<vwx>") - (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (vec_select:<VEL> + (match_operand:<VCOND> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] "TARGET_SIMD" { operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); @@ -2359,13 +2365,15 @@ (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> - (vec_select:<VEL> - (match_operand:<VCONQ> 3 "register_operand" "<vwx>") - (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (vec_select:<VEL> + (match_operand:<VCONQ> 3 "register_operand" "<vwx>") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))] "TARGET_SIMD" { operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); @@ -2390,11 +2398,11 @@ (define_insn "aarch64_<su>mull_n<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> - (match_operand:VD_HSI 1 "register_operand" "w")) - (ANY_EXTEND:<VWIDE> - (vec_duplicate:<VCOND> + (mult:<VWIDE> + (ANY_EXTEND:<VWIDE> + (match_operand:VD_HSI 1 "register_operand" "w")) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> (match_operand:<VEL> 2 "register_operand" "<h_con>")))))] "TARGET_SIMD" "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]" @@ -2404,11 +2412,12 @@ (define_insn "aarch64_<su>mull_hi_n<mode>_insn" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) (ANY_EXTEND:<VWIDE> - (vec_duplicate:<VCOND> + (vec_select:<VHALF> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> (match_operand:<VEL> 2 "register_operand" "<h_con>")))))] "TARGET_SIMD" "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]" @@ -2435,8 +2444,8 @@ (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (match_operand:<VCOND> 2 "register_operand" "w")) - (ANY_EXTEND:<VWIDE> - (vec_duplicate:<VCOND> + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> (vec_select:<VEL> (match_operand:VDQHS 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))) @@ -2453,13 +2462,15 @@ [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (plus:<VWIDE> (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> - (vec_select:<VEL> - (match_operand:<VCOND> 4 "register_operand" "<vwx>") - (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (vec_select:<VEL> + (match_operand:<VCOND> 4 "register_operand" "<vwx>") + (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) (match_operand:<VWIDE> 1 "register_operand" "0")))] "TARGET_SIMD" { @@ -2488,13 +2499,15 @@ [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (plus:<VWIDE> (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> - (vec_select:<VEL> - (match_operand:<VCONQ> 4 "register_operand" "<vwx>") - (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (vec_select:<VEL> + (match_operand:<VCONQ> 4 "register_operand" "<vwx>") + (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))) (match_operand:<VWIDE> 1 "register_operand" "0")))] "TARGET_SIMD" { @@ -2526,8 +2539,8 @@ (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (match_operand:<VCOND> 2 "register_operand" "w")) - (ANY_EXTEND:<VWIDE> - (vec_duplicate:<VCOND> + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> (vec_select:<VEL> (match_operand:VDQHS 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))] @@ -2544,13 +2557,15 @@ (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> - (vec_select:<VEL> - (match_operand:<VCOND> 4 "register_operand" "<vwx>") - (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (vec_select:<VEL> + (match_operand:<VCOND> 4 "register_operand" "<vwx>") + (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) )))] "TARGET_SIMD" { @@ -2580,13 +2595,15 @@ (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (mult:<VWIDE> - (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF> - (vec_select:<VEL> - (match_operand:<VCONQ> 4 "register_operand" "<vwx>") - (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) + (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (ANY_EXTEND:<VWIDE_S> + (vec_select:<VEL> + (match_operand:<VCONQ> 4 "register_operand" "<vwx>") + (parallel [(match_operand:SI 5 "immediate_operand" "i")])))) )))] "TARGET_SIMD" { @@ -5313,12 +5330,12 @@ (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (vec_select:<VEL> (match_operand:<VCOND> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) - )) + )) (const_int 1)) (match_operand:<VWIDE> 1 "register_operand" "0")))] "TARGET_SIMD" @@ -5338,12 +5355,12 @@ (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (vec_select:<VEL> (match_operand:<VCOND> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) - )) + )) (const_int 1))))] "TARGET_SIMD" { @@ -5363,12 +5380,12 @@ (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (vec_select:<VEL> (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) - )) + )) (const_int 1))))] "TARGET_SIMD" { @@ -5386,12 +5403,12 @@ (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (vec_select:<VEL> (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) - )) + )) (const_int 1)) (match_operand:<VWIDE> 1 "register_operand" "0")))] "TARGET_SIMD" @@ -5507,8 +5524,8 @@ (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (match_operand:<VEL> 3 "register_operand" "<vwx>")))) (const_int 1))))] "TARGET_SIMD" @@ -5523,8 +5540,8 @@ (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 2 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (match_operand:<VEL> 3 "register_operand" "<vwx>")))) (const_int 1)) (match_operand:<VWIDE> 1 "register_operand" "0")))] @@ -5601,11 +5618,11 @@ (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> - (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (vec_select:<VEL> (match_operand:<VCOND> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) @@ -5622,15 +5639,15 @@ (define_insn "aarch64_sqdmlal2_lane<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_plus:<VWIDE> + (ss_plus:<VWIDE> (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> - (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (vec_select:<VEL> (match_operand:<VCOND> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) @@ -5648,16 +5665,16 @@ (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_minus:<VWIDE> + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> - (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (vec_select:<VEL> (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) @@ -5674,15 +5691,15 @@ (define_insn "aarch64_sqdmlal2_laneq<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_plus:<VWIDE> + (ss_plus:<VWIDE> (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> - (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (vec_select:<VEL> (match_operand:<VCONQ> 3 "register_operand" "<vwx>") (parallel [(match_operand:SI 4 "immediate_operand" "i")]) @@ -5734,16 +5751,16 @@ (define_insn "aarch64_sqdmlsl2_n<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_minus:<VWIDE> + (ss_minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "0") (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> - (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (match_operand:<VEL> 3 "register_operand" "<vwx>")))) (const_int 1))))] "TARGET_SIMD" @@ -5753,15 +5770,15 @@ (define_insn "aarch64_sqdmlal2_n<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_plus:<VWIDE> + (ss_plus:<VWIDE> (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> - (vec_select:<VHALF> - (match_operand:VQ_HSI 2 "register_operand" "w") - (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> + (vec_select:<VHALF> + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> (match_operand:<VEL> 3 "register_operand" "<vwx>")))) (const_int 1)) (match_operand:<VWIDE> 1 "register_operand" "0")))] @@ -5806,13 +5823,13 @@ (define_insn "aarch64_sqdmull_lane<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_ashift:<VWIDE> + (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 1 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI - (vec_select:<VEL> + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> + (vec_select:<VEL> (match_operand:<VCOND> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) @@ -5827,13 +5844,13 @@ (define_insn "aarch64_sqdmull_laneq<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_ashift:<VWIDE> + (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 1 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI - (vec_select:<VEL> + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> + (vec_select:<VEL> (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) @@ -5890,13 +5907,13 @@ (define_insn "aarch64_sqdmull_n<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_ashift:<VWIDE> + (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (match_operand:VD_HSI 1 "register_operand" "w")) - (sign_extend:<VWIDE> - (vec_duplicate:VD_HSI - (match_operand:<VEL> 2 "register_operand" "<vwx>"))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> + (match_operand:<VEL> 2 "register_operand" "<vwx>"))) ) (const_int 1)))] "TARGET_SIMD" @@ -5906,8 +5923,6 @@ ;; vqdmull2 - - (define_insn "aarch64_sqdmull2<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (ss_ashift:<VWIDE> @@ -5943,15 +5958,15 @@ (define_insn "aarch64_sqdmull2_lane<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_ashift:<VWIDE> + (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> - (vec_select:<VEL> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> + (vec_select:<VEL> (match_operand:<VCOND> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) @@ -5966,15 +5981,15 @@ (define_insn "aarch64_sqdmull2_laneq<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_ashift:<VWIDE> + (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> - (vec_select:<VEL> + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> + (vec_select:<VEL> (match_operand:<VCONQ> 2 "register_operand" "<vwx>") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) )) @@ -6019,15 +6034,15 @@ (define_insn "aarch64_sqdmull2_n<mode>_internal" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (ss_ashift:<VWIDE> + (ss_ashift:<VWIDE> (mult:<VWIDE> (sign_extend:<VWIDE> (vec_select:<VHALF> - (match_operand:VQ_HSI 1 "register_operand" "w") - (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) - (sign_extend:<VWIDE> - (vec_duplicate:<VHALF> - (match_operand:<VEL> 2 "register_operand" "<vwx>"))) + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (vec_duplicate:<VWIDE> + (sign_extend:<VWIDE_S> + (match_operand:<VEL> 2 "register_operand" "<vwx>"))) ) (const_int 1)))] "TARGET_SIMD" diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index fd306bf..a719f57 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -1717,22 +1717,35 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode, && vec_duplicate_p (op, &elt) && code != VEC_DUPLICATE) { - /* Try applying the operator to ELT and see if that simplifies. - We can duplicate the result if so. + if (code == SIGN_EXTEND || code == ZERO_EXTEND) + /* Enforce a canonical order of VEC_DUPLICATE wrt other unary + operations by promoting VEC_DUPLICATE to the root of the expression + (as far as possible). */ + temp = simplify_gen_unary (code, GET_MODE_INNER (mode), + elt, GET_MODE_INNER (GET_MODE (op))); + else + /* Try applying the operator to ELT and see if that simplifies. + We can duplicate the result if so. - The reason we don't use simplify_gen_unary is that it isn't - necessarily a win to convert things like: + The reason we traditionally haven't used simplify_gen_unary + for these codes is that it didn't necessarily seem to be a + win to convert things like: - (neg:V (vec_duplicate:V (reg:S R))) + (neg:V (vec_duplicate:V (reg:S R))) - to: + to: - (vec_duplicate:V (neg:S (reg:S R))) + (vec_duplicate:V (neg:S (reg:S R))) - The first might be done entirely in vector registers while the - second might need a move between register files. */ - temp = simplify_unary_operation (code, GET_MODE_INNER (mode), - elt, GET_MODE_INNER (GET_MODE (op))); + The first might be done entirely in vector registers while the + second might need a move between register files. + + However, there also cases where promoting the vec_duplicate is + more efficient, and there is definite value in having a canonical + form when matching instruction patterns. We should consider + extending the simplify_gen_unary code above to more cases. */ + temp = simplify_unary_operation (code, GET_MODE_INNER (mode), + elt, GET_MODE_INNER (GET_MODE (op))); if (temp) return gen_vec_duplicate (mode, temp); } |