re PR target/70119 (AArch64 should take advantage of implicit truncation of variable shift amount without defining SHIFT_COUNT_TRUNCATED)

2017-06-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com> Michael Collison <michael.collison@arm.com> PR target/70119 * config/aarch64/aarch64.md (*aarch64_<optab>_reg_<mode>3_mask1): New pattern. (*aarch64_reg_<mode>3_neg_mask2): New pattern. (*aarch64_reg_<mode>3_minus_mask): New pattern. (*aarch64_<optab>_reg_di3_mask2): New pattern. * config/aarch64/aarch64.c (aarch64_rtx_costs): Account for cost of shift when the shift amount is masked with constant equal to the size of the mode. * config/aarch64/predicates.md (subreg_lowpart_operator): New predicate. 2017-06-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com> Michael Collison <michael.collison@arm.com> PR target/70119 * gcc.target/aarch64/var_shift_mask_1.c: New test. Co-Authored-By: Michael Collison <michael.collison@arm.com> From-SVN: r249774
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2017-06-29 09:21:57 +0000
committer: Michael Collison <collison@gcc.gnu.org> 2017-06-29 09:21:57 +0000
commit: 7813b280435f2e19c53df9f8b04a3d28bb561aa8 (patch)
tree: 98563d3d1aa4ffabc13c0d3178f3f9ad5dfe9233 /gcc/config
parent: 696bafe61327dbf2ce2dee7262af8f18b23fcfa1 (diff)
download: gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.zip
gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.tar.gz
gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.tar.bz2
3 files changed, 131 insertions, 14 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 593263f..5cf41fc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7541,17 +7541,26 @@ cost_plus:
         }
       else
         {
-	  if (speed)
+	  if (VECTOR_MODE_P (mode))
 	    {
-	      if (VECTOR_MODE_P (mode))
-		{
-		  /* Vector shift (register).  */
-		  *cost += extra_cost->vect.alu;
-		}
-	      else
+	      if (speed)
+		/* Vector shift (register).  */
+		*cost += extra_cost->vect.alu;
+	    }
+	  else
+	    {
+	      if (speed)
+		/* LSLV.  */
+		*cost += extra_cost->alu.shift_reg;
+
+	      if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
+		  && CONST_INT_P (XEXP (op1, 1))
+		  && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
 		{
-		  /* LSLV.  */
-		  *cost += extra_cost->alu.shift_reg;
+		  *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
+		  /* We already demanded XEXP (op1, 0) to be REG_P, so
+		     don't recurse into it.  */
+		  return true;
 		}
 	    }
 	  return false;  /* All arguments need to be in registers.  */
@@ -7580,14 +7589,27 @@ cost_plus:
 	}
       else
 	{
-
-	  /* ASR (register) and friends.  */
-	  if (speed)
+	  if (VECTOR_MODE_P (mode))
 	    {
-	      if (VECTOR_MODE_P (mode))
+	      if (speed)
+		/* Vector shift (register).  */
 		*cost += extra_cost->vect.alu;
-	      else
+	    }
+	  else
+	    {
+	      if (speed)
+		/* ASR (register) and friends.  */
 		*cost += extra_cost->alu.shift_reg;
+
+	      if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
+		  && CONST_INT_P (XEXP (op1, 1))
+		  && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
+		{
+		  *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
+		  /* We already demanded XEXP (op1, 0) to be REG_P, so
+		     don't recurse into it.  */
+		  return true;
+		}
 	    }
 	  return false;  /* All arguments need to be in registers.  */
 	}
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 6bdbf65..e6e7e64 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3942,6 +3942,97 @@
   }
 )
 
+;; When the LSL, LSR, ASR, ROR instructions operate on all register arguments
+;; they truncate the shift/rotate amount by the size of the registers they
+;; operate on: 32 for W-regs, 64 for X-regs.  This allows us to optimise away
+;; such redundant masking instructions.  GCC can do that automatically when
+;; SHIFT_COUNT_TRUNCATED is true, but we can't enable it for TARGET_SIMD
+;; because some of the SISD shift alternatives don't perform this truncations.
+;; So this pattern exists to catch such cases.
+
+(define_insn "*aarch64_<optab>_reg_<mode>3_mask1"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(SHIFT:GPI
+	  (match_operand:GPI 1 "register_operand" "r")
+	  (match_operator 4 "subreg_lowpart_operator"
+	   [(and:GPI (match_operand:GPI 2 "register_operand" "r")
+		     (match_operand 3 "const_int_operand" "n"))])))]
+  "(~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0"
+  "<shift>\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "shift_reg")]
+)
+
+(define_insn_and_split "*aarch64_reg_<mode>3_neg_mask2"
+  [(set (match_operand:GPI 0 "register_operand" "=&r")
+	(SHIFT:GPI
+	  (match_operand:GPI 1 "register_operand" "r")
+	  (match_operator 4 "subreg_lowpart_operator"
+	  [(neg:SI (and:SI (match_operand:SI 2 "register_operand" "r")
+			   (match_operand 3 "const_int_operand" "n")))])))]
+  "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0)"
+  "#"
+  "&& true"
+  [(const_int 0)]
+  {
+    rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode)
+	       : operands[0]);
+    emit_insn (gen_negsi2 (tmp, operands[2]));
+
+    rtx and_op = gen_rtx_AND (SImode, tmp, operands[3]);
+    rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[4]), and_op,
+				     SUBREG_BYTE (operands[4]));
+    emit_insn (gen_<optab><mode>3 (operands[0], operands[1], subreg_tmp));
+    DONE;
+  }
+)
+
+(define_insn_and_split "*aarch64_reg_<mode>3_minus_mask"
+  [(set (match_operand:GPI 0 "register_operand" "=&r")
+	(ashift:GPI
+	  (match_operand:GPI 1 "register_operand" "r")
+	  (minus:QI (match_operand 2 "const_int_operand" "n")
+		    (match_operator 5 "subreg_lowpart_operator"
+		    [(and:SI (match_operand:SI 3 "register_operand" "r")
+			     (match_operand 4 "const_int_operand" "n"))]))))]
+  "((~INTVAL (operands[4]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0)
+   && INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)"
+  "#"
+  "&& true"
+  [(const_int 0)]
+  {
+    rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode)
+	       : operands[0]);
+
+    emit_insn (gen_negsi2 (tmp, operands[3]));
+
+    rtx and_op = gen_rtx_AND (SImode, tmp, operands[4]);
+    rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[5]), and_op,
+				     SUBREG_BYTE (operands[5]));
+
+    emit_insn (gen_ashl<mode>3 (operands[0], operands[1], subreg_tmp));
+    DONE;
+  }
+)
+
+(define_insn "*aarch64_<optab>_reg_di3_mask2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(SHIFT:DI
+	  (match_operand:DI 1 "register_operand" "r")
+	  (match_operator 4 "subreg_lowpart_operator"
+	   [(and:SI (match_operand:SI 2 "register_operand" "r")
+		     (match_operand 3 "aarch64_shift_imm_di" "Usd"))])))]
+  "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode)-1)) == 0)"
+{
+  rtx xop[3];
+  xop[0] = operands[0];
+  xop[1] = operands[1];
+  xop[2] = gen_lowpart (GET_MODE (operands[4]), operands[2]);
+  output_asm_insn ("<shift>\t%x0, %x1, %x2", xop);
+  return "";
+}
+  [(set_attr "type" "shift_reg")]
+)
+
 ;; Logical left shift using SISD or Integer instruction
 (define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r,r,w,w")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index cd7ded9..ad8a43c 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -35,6 +35,10 @@
   (and (match_code "const_int")
        (match_test "op == CONST0_RTX (mode)")))
 
+(define_special_predicate "subreg_lowpart_operator"
+  (and (match_code "subreg")
+       (match_test "subreg_lowpart_p (op)")))
+
 (define_predicate "aarch64_ccmp_immediate"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), -31, 31)")))
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2017-06-29 09:21:57 +0000
committer	Michael Collison <collison@gcc.gnu.org>	2017-06-29 09:21:57 +0000
commit	7813b280435f2e19c53df9f8b04a3d28bb561aa8 (patch)
tree	98563d3d1aa4ffabc13c0d3178f3f9ad5dfe9233 /gcc/config
parent	696bafe61327dbf2ce2dee7262af8f18b23fcfa1 (diff)
download	gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.zip gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.tar.gz gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.tar.bz2