simplify-rtx: Push sign/zero-extension inside vec_duplicate

As a general principle, vec_duplicate should be as close to the root of an expression as possible. Where unary operations have vec_duplicate as an argument, these operations should be pushed inside the vec_duplicate. This patch modifies unary operation simplification to push sign/zero-extension of a scalar inside vec_duplicate. This patch also updates all RTL patterns in aarch64-simd.md to use the new canonical form. gcc/ChangeLog: 2021-07-19 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd.md: Push sign/zero-extension inside vec_duplicate for all patterns. * simplify-rtx.c (simplify_context::simplify_unary_operation_1): Push sign/zero-extension inside vec_duplicate.
author: Jonathan Wright <jonathan.wright@arm.com> 2021-07-16 15:34:38 +0100
committer: Jonathan Wright <jonathan.wright@arm.com> 2021-07-27 10:42:33 +0100
commit: 3bc9db6a989671bedf19e61bd1b21f79588e99da (patch)
tree: d658c0f489f3895ab00b4a1351a40394b55306c1 /gcc
parent: d88a6951586c7229b25708f4486eaaf4bf4b5bbe (diff)
download: gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.zip
gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.tar.gz
gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.tar.bz2
2 files changed, 211 insertions, 183 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 13c8698..c5638d0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2079,14 +2079,16 @@
 
 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (plus:<VWIDE>
-          (mult:<VWIDE>
-              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-                 (match_operand:VQ_HSI 2 "register_operand" "w")
-                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-              (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
-	               (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
-          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+	(plus:<VWIDE>
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE>
+	      (vec_select:<VHALF>
+		(match_operand:VQ_HSI 2 "register_operand" "w")
+		(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
+		(match_operand:<VEL> 4 "register_operand" "<h_con>"))))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2154,14 +2156,16 @@
 
 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (minus:<VWIDE>
-          (match_operand:<VWIDE> 1 "register_operand" "0")
-          (mult:<VWIDE>
-            (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-              (match_operand:VQ_HSI 2 "register_operand" "w")
-              (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-            (ANY_EXTEND:<VWIDE> (vec_duplicate:<VCOND>
-	            (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
+	(minus:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE>
+	      (vec_select:<VHALF>
+		(match_operand:VQ_HSI 2 "register_operand" "w")
+		(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
+		(match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
   "TARGET_SIMD"
   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2197,14 +2201,14 @@
 
 (define_insn "aarch64_<su>mlal_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (plus:<VWIDE>
-          (mult:<VWIDE>
-            (ANY_EXTEND:<VWIDE>
-              (match_operand:VD_HSI 2 "register_operand" "w"))
-            (ANY_EXTEND:<VWIDE>
-              (vec_duplicate:VD_HSI
-	              (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
-          (match_operand:<VWIDE> 1 "register_operand" "0")))]
+	(plus:<VWIDE>
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE>
+	      (match_operand:VD_HSI 2 "register_operand" "w"))
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
+		(match_operand:<VEL> 3 "register_operand" "<h_con>"))))
+	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2226,14 +2230,14 @@
 
 (define_insn "aarch64_<su>mlsl_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (minus:<VWIDE>
-          (match_operand:<VWIDE> 1 "register_operand" "0")
-          (mult:<VWIDE>
-            (ANY_EXTEND:<VWIDE>
-              (match_operand:VD_HSI 2 "register_operand" "w"))
-            (ANY_EXTEND:<VWIDE>
-              (vec_duplicate:VD_HSI
-	              (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
+	(minus:<VWIDE>
+	  (match_operand:<VWIDE> 1 "register_operand" "0")
+	  (mult:<VWIDE>
+	    (ANY_EXTEND:<VWIDE>
+	      (match_operand:VD_HSI 2 "register_operand" "w"))
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
+		(match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
   "TARGET_SIMD"
   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
   [(set_attr "type" "neon_mla_<Vetype>_long")]
@@ -2311,8 +2315,8 @@
 	(mult:<VWIDE>
 	  (ANY_EXTEND:<VWIDE>
 	    (match_operand:<VCOND> 1 "register_operand" "w"))
-	  (ANY_EXTEND:<VWIDE>
-	    (vec_duplicate:<VCOND>
+	  (vec_duplicate:<VWIDE>
+	    (ANY_EXTEND:<VWIDE_S>
 	      (vec_select:<VEL>
 		(match_operand:VDQHS 2 "register_operand" "<vwx>")
 		(parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
@@ -2327,13 +2331,15 @@
 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(mult:<VWIDE>
-	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-	    (match_operand:VQ_HSI 1 "register_operand" "w")
-	    (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
-	  (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-	    (vec_select:<VEL>
-	      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
-	      (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
+	  (ANY_EXTEND:<VWIDE>
+	    (vec_select:<VHALF>
+	      (match_operand:VQ_HSI 1 "register_operand" "w")
+	      (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
+	  (vec_duplicate:<VWIDE>
+	    (ANY_EXTEND:<VWIDE_S>
+	      (vec_select:<VEL>
+		(match_operand:<VCOND> 3 "register_operand" "<vwx>")
+		(parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
   "TARGET_SIMD"
   {
     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
@@ -2359,13 +2365,15 @@
 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(mult:<VWIDE>
-	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-	    (match_operand:VQ_HSI 1 "register_operand" "w")
-	    (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
-	  (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-	    (vec_select:<VEL>
-	      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
-	      (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
+	  (ANY_EXTEND:<VWIDE>
+	    (vec_select:<VHALF>
+	      (match_operand:VQ_HSI 1 "register_operand" "w")
+	      (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
+	  (vec_duplicate:<VWIDE>
+	    (ANY_EXTEND:<VWIDE_S>
+	      (vec_select:<VEL>
+		(match_operand:<VCONQ> 3 "register_operand" "<vwx>")
+		(parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
   "TARGET_SIMD"
   {
     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
@@ -2390,11 +2398,11 @@
 
 (define_insn "aarch64_<su>mull_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (mult:<VWIDE>
-          (ANY_EXTEND:<VWIDE>
-            (match_operand:VD_HSI 1 "register_operand" "w"))
-          (ANY_EXTEND:<VWIDE>
-            (vec_duplicate:<VCOND>
+	(mult:<VWIDE>
+	  (ANY_EXTEND:<VWIDE>
+	    (match_operand:VD_HSI 1 "register_operand" "w"))
+	  (vec_duplicate:<VWIDE>
+	    (ANY_EXTEND:<VWIDE_S>
 	      (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
   "TARGET_SIMD"
   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
@@ -2404,11 +2412,12 @@
 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(mult:<VWIDE>
-	  (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-	    (match_operand:VQ_HSI 1 "register_operand" "w")
-	    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
 	  (ANY_EXTEND:<VWIDE>
-	    (vec_duplicate:<VCOND>
+	    (vec_select:<VHALF>
+	      (match_operand:VQ_HSI 1 "register_operand" "w")
+	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	  (vec_duplicate:<VWIDE>
+	    (ANY_EXTEND:<VWIDE_S>
 	      (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
   "TARGET_SIMD"
   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
@@ -2435,8 +2444,8 @@
 	  (mult:<VWIDE>
 	    (ANY_EXTEND:<VWIDE>
 	      (match_operand:<VCOND> 2 "register_operand" "w"))
-	    (ANY_EXTEND:<VWIDE>
-	      (vec_duplicate:<VCOND>
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
 		(vec_select:<VEL>
 		  (match_operand:VDQHS 3 "register_operand" "<vwx>")
 		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
@@ -2453,13 +2462,15 @@
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(plus:<VWIDE>
 	  (mult:<VWIDE>
-	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-	      (match_operand:VQ_HSI 2 "register_operand" "w")
-	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-	      (vec_select:<VEL>
-		(match_operand:<VCOND> 4 "register_operand" "<vwx>")
-		(parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
+	    (ANY_EXTEND:<VWIDE>
+	      (vec_select:<VHALF>
+		(match_operand:VQ_HSI 2 "register_operand" "w")
+		(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
+		(vec_select:<VEL>
+		  (match_operand:<VCOND> 4 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
 	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   {
@@ -2488,13 +2499,15 @@
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(plus:<VWIDE>
 	  (mult:<VWIDE>
-	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-	      (match_operand:VQ_HSI 2 "register_operand" "w")
-	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-	      (vec_select:<VEL>
-		(match_operand:<VCONQ> 4 "register_operand" "<vwx>")
-		(parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
+	    (ANY_EXTEND:<VWIDE>
+	      (vec_select:<VHALF>
+		(match_operand:VQ_HSI 2 "register_operand" "w")
+		(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
+		(vec_select:<VEL>
+		  (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
 	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
   {
@@ -2526,8 +2539,8 @@
      (mult:<VWIDE>
        (ANY_EXTEND:<VWIDE>
 	 (match_operand:<VCOND> 2 "register_operand" "w"))
-       (ANY_EXTEND:<VWIDE>
-	 (vec_duplicate:<VCOND>
+       (vec_duplicate:<VWIDE>
+	 (ANY_EXTEND:<VWIDE_S>
 	   (vec_select:<VEL>
 	     (match_operand:VDQHS 3 "register_operand" "<vwx>")
 	     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
@@ -2544,13 +2557,15 @@
 	(minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (mult:<VWIDE>
-	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-	      (match_operand:VQ_HSI 2 "register_operand" "w")
-	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-	      (vec_select:<VEL>
-		(match_operand:<VCOND> 4 "register_operand" "<vwx>")
-		(parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
+	    (ANY_EXTEND:<VWIDE>
+	      (vec_select:<VHALF>
+		(match_operand:VQ_HSI 2 "register_operand" "w")
+		(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
+		(vec_select:<VEL>
+		  (match_operand:<VCOND> 4 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
 	  )))]
   "TARGET_SIMD"
   {
@@ -2580,13 +2595,15 @@
 	(minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (mult:<VWIDE>
-	    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
-	      (match_operand:VQ_HSI 2 "register_operand" "w")
-	      (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-	    (ANY_EXTEND:<VWIDE> (vec_duplicate:<VHALF>
-	      (vec_select:<VEL>
-		(match_operand:<VCONQ> 4 "register_operand" "<vwx>")
-		(parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
+	    (ANY_EXTEND:<VWIDE>
+	      (vec_select:<VHALF>
+		(match_operand:VQ_HSI 2 "register_operand" "w")
+		(match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	    (vec_duplicate:<VWIDE>
+	      (ANY_EXTEND:<VWIDE_S>
+		(vec_select:<VEL>
+		  (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
+		  (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
 	  )))]
   "TARGET_SIMD"
   {
@@ -5313,12 +5330,12 @@
 	    (mult:<VWIDE>
 	      (sign_extend:<VWIDE>
 		(match_operand:VD_HSI 2 "register_operand" "w"))
-	      (sign_extend:<VWIDE>
-		(vec_duplicate:VD_HSI
+	      (vec_duplicate:<VWIDE>
+		(sign_extend:<VWIDE_S>
 		  (vec_select:<VEL>
 		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
 		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
-              ))
+	      ))
 	    (const_int 1))
 	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
@@ -5338,12 +5355,12 @@
 	    (mult:<VWIDE>
 	      (sign_extend:<VWIDE>
 		(match_operand:VD_HSI 2 "register_operand" "w"))
-	      (sign_extend:<VWIDE>
-		(vec_duplicate:VD_HSI
+	      (vec_duplicate:<VWIDE>
+		(sign_extend:<VWIDE_S>
 		  (vec_select:<VEL>
 		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
 		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
-              ))
+	      ))
 	    (const_int 1))))]
   "TARGET_SIMD"
   {
@@ -5363,12 +5380,12 @@
 	    (mult:<VWIDE>
 	      (sign_extend:<VWIDE>
 		(match_operand:VD_HSI 2 "register_operand" "w"))
-	      (sign_extend:<VWIDE>
-		(vec_duplicate:VD_HSI
+	      (vec_duplicate:<VWIDE>
+		(sign_extend:<VWIDE_S>
 		  (vec_select:<VEL>
 		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
-              ))
+	      ))
 	    (const_int 1))))]
   "TARGET_SIMD"
   {
@@ -5386,12 +5403,12 @@
 	    (mult:<VWIDE>
 	      (sign_extend:<VWIDE>
 		(match_operand:VD_HSI 2 "register_operand" "w"))
-	      (sign_extend:<VWIDE>
-		(vec_duplicate:VD_HSI
+	      (vec_duplicate:<VWIDE>
+		(sign_extend:<VWIDE_S>
 		  (vec_select:<VEL>
 		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
-              ))
+	      ))
 	    (const_int 1))
 	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
   "TARGET_SIMD"
@@ -5507,8 +5524,8 @@
 	      (mult:<VWIDE>
 		(sign_extend:<VWIDE>
 		      (match_operand:VD_HSI 2 "register_operand" "w"))
-		(sign_extend:<VWIDE>
-		  (vec_duplicate:VD_HSI
+		(vec_duplicate:<VWIDE>
+		  (sign_extend:<VWIDE_S>
 		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
 	      (const_int 1))))]
   "TARGET_SIMD"
@@ -5523,8 +5540,8 @@
 	      (mult:<VWIDE>
 		(sign_extend:<VWIDE>
 		      (match_operand:VD_HSI 2 "register_operand" "w"))
-		(sign_extend:<VWIDE>
-		  (vec_duplicate:VD_HSI
+		(vec_duplicate:<VWIDE>
+		  (sign_extend:<VWIDE_S>
 		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
 	      (const_int 1))
 	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
@@ -5601,11 +5618,11 @@
 	  (ss_ashift:<VWIDE>
 	      (mult:<VWIDE>
 		(sign_extend:<VWIDE>
-                  (vec_select:<VHALF>
-                    (match_operand:VQ_HSI 2 "register_operand" "w")
-                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
-		(sign_extend:<VWIDE>
-                  (vec_duplicate:<VHALF>
+		  (vec_select:<VHALF>
+		    (match_operand:VQ_HSI 2 "register_operand" "w")
+		    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(vec_duplicate:<VWIDE>
+		  (sign_extend:<VWIDE_S>
 		    (vec_select:<VEL>
 		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
 		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
@@ -5622,15 +5639,15 @@
 
 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_plus:<VWIDE>
+	(ss_plus:<VWIDE>
 	  (ss_ashift:<VWIDE>
 	      (mult:<VWIDE>
 		(sign_extend:<VWIDE>
-                  (vec_select:<VHALF>
-                    (match_operand:VQ_HSI 2 "register_operand" "w")
-                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
-		(sign_extend:<VWIDE>
-                  (vec_duplicate:<VHALF>
+		  (vec_select:<VHALF>
+		    (match_operand:VQ_HSI 2 "register_operand" "w")
+		    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(vec_duplicate:<VWIDE>
+		  (sign_extend:<VWIDE_S>
 		    (vec_select:<VEL>
 		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
 		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
@@ -5648,16 +5665,16 @@
 
 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_minus:<VWIDE>
+	(ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	      (mult:<VWIDE>
 		(sign_extend:<VWIDE>
-                  (vec_select:<VHALF>
-                    (match_operand:VQ_HSI 2 "register_operand" "w")
-                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
-		(sign_extend:<VWIDE>
-                  (vec_duplicate:<VHALF>
+		  (vec_select:<VHALF>
+		    (match_operand:VQ_HSI 2 "register_operand" "w")
+		    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(vec_duplicate:<VWIDE>
+		  (sign_extend:<VWIDE_S>
 		    (vec_select:<VEL>
 		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
@@ -5674,15 +5691,15 @@
 
 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_plus:<VWIDE>
+	(ss_plus:<VWIDE>
 	  (ss_ashift:<VWIDE>
 	      (mult:<VWIDE>
 		(sign_extend:<VWIDE>
-                  (vec_select:<VHALF>
-                    (match_operand:VQ_HSI 2 "register_operand" "w")
-                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
-		(sign_extend:<VWIDE>
-                  (vec_duplicate:<VHALF>
+		  (vec_select:<VHALF>
+		    (match_operand:VQ_HSI 2 "register_operand" "w")
+		    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
+		(vec_duplicate:<VWIDE>
+		  (sign_extend:<VWIDE_S>
 		    (vec_select:<VEL>
 		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
 		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
@@ -5734,16 +5751,16 @@
 
 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_minus:<VWIDE>
+	(ss_minus:<VWIDE>
 	  (match_operand:<VWIDE> 1 "register_operand" "0")
 	  (ss_ashift:<VWIDE>
 	    (mult:<VWIDE>
 	      (sign_extend:<VWIDE>
-                (vec_select:<VHALF>
-                  (match_operand:VQ_HSI 2 "register_operand" "w")
-                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
-	      (sign_extend:<VWIDE>
-                (vec_duplicate:<VHALF>
+		(vec_select:<VHALF>
+		  (match_operand:VQ_HSI 2 "register_operand" "w")
+		  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	      (vec_duplicate:<VWIDE>
+		(sign_extend:<VWIDE_S>
 		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
 	    (const_int 1))))]
   "TARGET_SIMD"
@@ -5753,15 +5770,15 @@
 
 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_plus:<VWIDE>
+	(ss_plus:<VWIDE>
 	  (ss_ashift:<VWIDE>
 	    (mult:<VWIDE>
 	      (sign_extend:<VWIDE>
-                (vec_select:<VHALF>
-                  (match_operand:VQ_HSI 2 "register_operand" "w")
-                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
-	      (sign_extend:<VWIDE>
-                (vec_duplicate:<VHALF>
+		(vec_select:<VHALF>
+		  (match_operand:VQ_HSI 2 "register_operand" "w")
+		  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	      (vec_duplicate:<VWIDE>
+		(sign_extend:<VWIDE_S>
 		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
 	    (const_int 1))
 	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
@@ -5806,13 +5823,13 @@
 
 (define_insn "aarch64_sqdmull_lane<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+	(ss_ashift:<VWIDE>
 	     (mult:<VWIDE>
 	       (sign_extend:<VWIDE>
 		 (match_operand:VD_HSI 1 "register_operand" "w"))
-	       (sign_extend:<VWIDE>
-                 (vec_duplicate:VD_HSI
-                   (vec_select:<VEL>
+	       (vec_duplicate:<VWIDE>
+		 (sign_extend:<VWIDE_S>
+		   (vec_select:<VEL>
 		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
@@ -5827,13 +5844,13 @@
 
 (define_insn "aarch64_sqdmull_laneq<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+	(ss_ashift:<VWIDE>
 	     (mult:<VWIDE>
 	       (sign_extend:<VWIDE>
 		 (match_operand:VD_HSI 1 "register_operand" "w"))
-	       (sign_extend:<VWIDE>
-                 (vec_duplicate:VD_HSI
-                   (vec_select:<VEL>
+	       (vec_duplicate:<VWIDE>
+		 (sign_extend:<VWIDE_S>
+		   (vec_select:<VEL>
 		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
@@ -5890,13 +5907,13 @@
 
 (define_insn "aarch64_sqdmull_n<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+	(ss_ashift:<VWIDE>
 	     (mult:<VWIDE>
 	       (sign_extend:<VWIDE>
 		 (match_operand:VD_HSI 1 "register_operand" "w"))
-	       (sign_extend:<VWIDE>
-                 (vec_duplicate:VD_HSI
-                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
+	       (vec_duplicate:<VWIDE>
+		 (sign_extend:<VWIDE_S>
+		   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
 	       )
 	     (const_int 1)))]
   "TARGET_SIMD"
@@ -5906,8 +5923,6 @@
 
 ;; vqdmull2
 
-
-
 (define_insn "aarch64_sqdmull2<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (ss_ashift:<VWIDE>
@@ -5943,15 +5958,15 @@
 
 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+	(ss_ashift:<VWIDE>
 	     (mult:<VWIDE>
 	       (sign_extend:<VWIDE>
 		 (vec_select:<VHALF>
-                   (match_operand:VQ_HSI 1 "register_operand" "w")
-                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
-	       (sign_extend:<VWIDE>
-                 (vec_duplicate:<VHALF>
-                   (vec_select:<VEL>
+		   (match_operand:VQ_HSI 1 "register_operand" "w")
+		   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	       (vec_duplicate:<VWIDE>
+		 (sign_extend:<VWIDE_S>
+		   (vec_select:<VEL>
 		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
@@ -5966,15 +5981,15 @@
 
 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+	(ss_ashift:<VWIDE>
 	     (mult:<VWIDE>
 	       (sign_extend:<VWIDE>
 		 (vec_select:<VHALF>
-                   (match_operand:VQ_HSI 1 "register_operand" "w")
-                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
-	       (sign_extend:<VWIDE>
-                 (vec_duplicate:<VHALF>
-                   (vec_select:<VEL>
+		   (match_operand:VQ_HSI 1 "register_operand" "w")
+		   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
+	       (vec_duplicate:<VWIDE>
+		 (sign_extend:<VWIDE_S>
+		   (vec_select:<VEL>
 		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
 		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 	       ))
@@ -6019,15 +6034,15 @@
 
 (define_insn "aarch64_sqdmull2_n<mode>_internal"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ss_ashift:<VWIDE>
+	(ss_ashift:<VWIDE>
 	     (mult:<VWIDE>
 	       (sign_extend:<VWIDE>
 		 (vec_select:<VHALF>
-                   (match_operand:VQ_HSI 1 "register_operand" "w")
-                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
-	       (sign_extend:<VWIDE>
-                 (vec_duplicate:<VHALF>
-                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
+		   (match_operand:VQ_HSI 1 "register_operand" "w")
+		   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
+	       (vec_duplicate:<VWIDE>
+		 (sign_extend:<VWIDE_S>
+		   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
 	       )
 	     (const_int 1)))]
   "TARGET_SIMD"
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index fd306bf..a719f57 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1717,22 +1717,35 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode,
       && vec_duplicate_p (op, &elt)
       && code != VEC_DUPLICATE)
     {
-      /* Try applying the operator to ELT and see if that simplifies.
-	 We can duplicate the result if so.
+      if (code == SIGN_EXTEND || code == ZERO_EXTEND)
+	/* Enforce a canonical order of VEC_DUPLICATE wrt other unary
+	   operations by promoting VEC_DUPLICATE to the root of the expression
+	   (as far as possible).  */
+	temp = simplify_gen_unary (code, GET_MODE_INNER (mode),
+				   elt, GET_MODE_INNER (GET_MODE (op)));
+      else
+	/* Try applying the operator to ELT and see if that simplifies.
+	   We can duplicate the result if so.
 
-	 The reason we don't use simplify_gen_unary is that it isn't
-	 necessarily a win to convert things like:
+	   The reason we traditionally haven't used simplify_gen_unary
+	   for these codes is that it didn't necessarily seem to be a
+	   win to convert things like:
 
-	   (neg:V (vec_duplicate:V (reg:S R)))
+	     (neg:V (vec_duplicate:V (reg:S R)))
 
-	 to:
+	   to:
 
-	   (vec_duplicate:V (neg:S (reg:S R)))
+	     (vec_duplicate:V (neg:S (reg:S R)))
 
-	 The first might be done entirely in vector registers while the
-	 second might need a move between register files.  */
-      temp = simplify_unary_operation (code, GET_MODE_INNER (mode),
-				       elt, GET_MODE_INNER (GET_MODE (op)));
+	   The first might be done entirely in vector registers while the
+	   second might need a move between register files.
+
+	   However, there also cases where promoting the vec_duplicate is
+	   more efficient, and there is definite value in having a canonical
+	   form when matching instruction patterns.  We should consider
+	   extending the simplify_gen_unary code above to more cases.  */
+	temp = simplify_unary_operation (code, GET_MODE_INNER (mode),
+					 elt, GET_MODE_INNER (GET_MODE (op)));
       if (temp)
 	return gen_vec_duplicate (mode, temp);
     }
author	Jonathan Wright <jonathan.wright@arm.com>	2021-07-16 15:34:38 +0100
committer	Jonathan Wright <jonathan.wright@arm.com>	2021-07-27 10:42:33 +0100
commit	3bc9db6a989671bedf19e61bd1b21f79588e99da (patch)
tree	d658c0f489f3895ab00b4a1351a40394b55306c1 /gcc
parent	d88a6951586c7229b25708f4486eaaf4bf4b5bbe (diff)
download	gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.zip gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.tar.gz gcc-3bc9db6a989671bedf19e61bd1b21f79588e99da.tar.bz2