Fix simplify_shift_const_1 handling of vector shifts

simplify_shift_const_1 handles both shifts of scalars by scalars and shifts of vectors by scalars. For vectors this means that each element is shifted by the same amount. However: (a) the two cases weren't always distinguished, so we'd try things for vectors that only made sense for scalars. (b) a lot of the range and bitcount checks were based on the bitsize or precision of the full shifted operand, rather than the mode of each element. Fixing (b) accidentally exposed more optimisation opportunities, although that wasn't the point of the patch. gcc/ 2016-11-15 Richard Sandiford <richard.sandiford@arm.com> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> * combine.c (simplify_shift_const_1): Use the number of bits in the inner mode to determine the range of the shift. When handling shifts of vectors, skip any rules that apply only to scalars. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r242442
author: Richard Sandiford <richard.sandiford@arm.com> 2016-11-15 17:57:53 +0000
committer: Richard Sandiford <rsandifo@gcc.gnu.org> 2016-11-15 17:57:53 +0000
commit: ba3c3dc0702eca06bde4841e3bbdf0d428b71968 (patch)
tree: 51c0c3fdc5c0927ce9f1865ae0afe38fadb0b018
parent: 89e64bc0305f36a4abe486ce07cbee7c13a6dc81 (diff)
download: gcc-ba3c3dc0702eca06bde4841e3bbdf0d428b71968.zip
gcc-ba3c3dc0702eca06bde4841e3bbdf0d428b71968.tar.gz
gcc-ba3c3dc0702eca06bde4841e3bbdf0d428b71968.tar.bz2
2 files changed, 98 insertions, 54 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 95e3dc5..dee6da0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -2,6 +2,15 @@
 	    Alan Hayward  <alan.hayward@arm.com>
 	    David Sherwood  <david.sherwood@arm.com>
 
+	* combine.c (simplify_shift_const_1): Use the number of bits
+	in the inner mode to determine the range of the shift.
+	When handling shifts of vectors, skip any rules that apply
+	only to scalars.
+
+2016-11-15  Richard Sandiford  <richard.sandiford@arm.com>
+	    Alan Hayward  <alan.hayward@arm.com>
+	    David Sherwood  <david.sherwood@arm.com>
+
 	* rtlanal.c (num_sign_bit_copies1): Calculate bitwidth after
 	handling VOIDmode.
 
diff --git a/gcc/combine.c b/gcc/combine.c
index 2de2a3e..3f197fa 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -10228,12 +10228,12 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
      want to do this inside the loop as it makes it more difficult to
      combine shifts.  */
   if (SHIFT_COUNT_TRUNCATED)
-    orig_count &= GET_MODE_BITSIZE (mode) - 1;
+    orig_count &= GET_MODE_UNIT_BITSIZE (mode) - 1;
 
   /* If we were given an invalid count, don't do anything except exactly
      what was requested.  */
 
-  if (orig_count < 0 || orig_count >= (int) GET_MODE_PRECISION (mode))
+  if (orig_count < 0 || orig_count >= (int) GET_MODE_UNIT_PRECISION (mode))
     return NULL_RTX;
 
   count = orig_count;
@@ -10250,16 +10250,14 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
       /* Convert ROTATERT to ROTATE.  */
       if (code == ROTATERT)
 	{
-	  unsigned int bitsize = GET_MODE_PRECISION (result_mode);
+	  unsigned int bitsize = GET_MODE_UNIT_PRECISION (result_mode);
 	  code = ROTATE;
-	  if (VECTOR_MODE_P (result_mode))
-	    count = bitsize / GET_MODE_NUNITS (result_mode) - count;
-	  else
-	    count = bitsize - count;
+	  count = bitsize - count;
 	}
 
       shift_mode = try_widen_shift_mode (code, varop, count, result_mode,
 					 mode, outer_op, outer_const);
+      machine_mode shift_unit_mode = GET_MODE_INNER (shift_mode);
 
       /* Handle cases where the count is greater than the size of the mode
 	 minus 1.  For ASHIFT, use the size minus one as the count (this can
@@ -10271,12 +10269,12 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	 multiple operations, each of which are defined, we know what the
 	 result is supposed to be.  */
 
-      if (count > (GET_MODE_PRECISION (shift_mode) - 1))
+      if (count > (GET_MODE_PRECISION (shift_unit_mode) - 1))
 	{
 	  if (code == ASHIFTRT)
-	    count = GET_MODE_PRECISION (shift_mode) - 1;
+	    count = GET_MODE_PRECISION (shift_unit_mode) - 1;
 	  else if (code == ROTATE || code == ROTATERT)
-	    count %= GET_MODE_PRECISION (shift_mode);
+	    count %= GET_MODE_PRECISION (shift_unit_mode);
 	  else
 	    {
 	      /* We can't simply return zero because there may be an
@@ -10292,44 +10290,49 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
       if (complement_p)
 	break;
 
-      /* An arithmetic right shift of a quantity known to be -1 or 0
-	 is a no-op.  */
-      if (code == ASHIFTRT
-	  && (num_sign_bit_copies (varop, shift_mode)
-	      == GET_MODE_PRECISION (shift_mode)))
+      if (shift_mode == shift_unit_mode)
 	{
-	  count = 0;
-	  break;
-	}
+	  /* An arithmetic right shift of a quantity known to be -1 or 0
+	     is a no-op.  */
+	  if (code == ASHIFTRT
+	      && (num_sign_bit_copies (varop, shift_unit_mode)
+		  == GET_MODE_PRECISION (shift_unit_mode)))
+	    {
+	      count = 0;
+	      break;
+	    }
 
-      /* If we are doing an arithmetic right shift and discarding all but
-	 the sign bit copies, this is equivalent to doing a shift by the
-	 bitsize minus one.  Convert it into that shift because it will often
-	 allow other simplifications.  */
-
-      if (code == ASHIFTRT
-	  && (count + num_sign_bit_copies (varop, shift_mode)
-	      >= GET_MODE_PRECISION (shift_mode)))
-	count = GET_MODE_PRECISION (shift_mode) - 1;
-
-      /* We simplify the tests below and elsewhere by converting
-	 ASHIFTRT to LSHIFTRT if we know the sign bit is clear.
-	 `make_compound_operation' will convert it to an ASHIFTRT for
-	 those machines (such as VAX) that don't have an LSHIFTRT.  */
-      if (code == ASHIFTRT
-	  && val_signbit_known_clear_p (shift_mode,
-					nonzero_bits (varop, shift_mode)))
-	code = LSHIFTRT;
-
-      if (((code == LSHIFTRT
-	    && HWI_COMPUTABLE_MODE_P (shift_mode)
-	    && !(nonzero_bits (varop, shift_mode) >> count))
-	   || (code == ASHIFT
-	       && HWI_COMPUTABLE_MODE_P (shift_mode)
-	       && !((nonzero_bits (varop, shift_mode) << count)
-		    & GET_MODE_MASK (shift_mode))))
-	  && !side_effects_p (varop))
-	varop = const0_rtx;
+	  /* If we are doing an arithmetic right shift and discarding all but
+	     the sign bit copies, this is equivalent to doing a shift by the
+	     bitsize minus one.  Convert it into that shift because it will
+	     often allow other simplifications.  */
+
+	  if (code == ASHIFTRT
+	      && (count + num_sign_bit_copies (varop, shift_unit_mode)
+		  >= GET_MODE_PRECISION (shift_unit_mode)))
+	    count = GET_MODE_PRECISION (shift_unit_mode) - 1;
+
+	  /* We simplify the tests below and elsewhere by converting
+	     ASHIFTRT to LSHIFTRT if we know the sign bit is clear.
+	     `make_compound_operation' will convert it to an ASHIFTRT for
+	     those machines (such as VAX) that don't have an LSHIFTRT.  */
+	  if (code == ASHIFTRT
+	      && HWI_COMPUTABLE_MODE_P (shift_unit_mode)
+	      && val_signbit_known_clear_p (shift_unit_mode,
+					    nonzero_bits (varop,
+							  shift_unit_mode)))
+	    code = LSHIFTRT;
+
+	  if (((code == LSHIFTRT
+		&& HWI_COMPUTABLE_MODE_P (shift_unit_mode)
+		&& !(nonzero_bits (varop, shift_unit_mode) >> count))
+	       || (code == ASHIFT
+		   && HWI_COMPUTABLE_MODE_P (shift_unit_mode)
+		   && !((nonzero_bits (varop, shift_unit_mode) << count)
+			& GET_MODE_MASK (shift_unit_mode))))
+	      && !side_effects_p (varop))
+	    varop = const0_rtx;
+	}
 
       switch (GET_CODE (varop))
 	{
@@ -10346,6 +10349,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case MEM:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* If we have (xshiftrt (mem ...) C) and C is MODE_WIDTH
 	     minus the width of a smaller mode, we can do this with a
 	     SIGN_EXTEND or ZERO_EXTEND from the narrower memory location.  */
@@ -10368,6 +10375,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case SUBREG:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* If VAROP is a SUBREG, strip it as long as the inner operand has
 	     the same number of words as what we've seen so far.  Then store
 	     the widest mode in MODE.  */
@@ -10424,9 +10435,9 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	     interpreted as the sign bit in a narrower mode, so, if
 	     the result is narrower, don't discard the shift.  */
 	  if (code == LSHIFTRT
-	      && count == (GET_MODE_BITSIZE (result_mode) - 1)
-	      && (GET_MODE_BITSIZE (result_mode)
-		  >= GET_MODE_BITSIZE (GET_MODE (varop))))
+	      && count == (GET_MODE_UNIT_BITSIZE (result_mode) - 1)
+	      && (GET_MODE_UNIT_BITSIZE (result_mode)
+		  >= GET_MODE_UNIT_BITSIZE (GET_MODE (varop))))
 	    {
 	      varop = XEXP (varop, 0);
 	      continue;
@@ -10437,14 +10448,17 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	case LSHIFTRT:
 	case ASHIFT:
 	case ROTATE:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* Here we have two nested shifts.  The result is usually the
 	     AND of a new shift with a mask.  We compute the result below.  */
 	  if (CONST_INT_P (XEXP (varop, 1))
 	      && INTVAL (XEXP (varop, 1)) >= 0
 	      && INTVAL (XEXP (varop, 1)) < GET_MODE_PRECISION (GET_MODE (varop))
 	      && HWI_COMPUTABLE_MODE_P (result_mode)
-	      && HWI_COMPUTABLE_MODE_P (mode)
-	      && !VECTOR_MODE_P (result_mode))
+	      && HWI_COMPUTABLE_MODE_P (mode))
 	    {
 	      enum rtx_code first_code = GET_CODE (varop);
 	      unsigned int first_count = INTVAL (XEXP (varop, 1));
@@ -10610,7 +10624,8 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case NOT:
-	  if (VECTOR_MODE_P (mode))
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
 	    break;
 
 	  /* Make this fit the case below.  */
@@ -10620,6 +10635,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	case IOR:
 	case AND:
 	case XOR:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* If we have (xshiftrt (ior (plus X (const_int -1)) X) C)
 	     with C the size of VAROP - 1 and the shift is logical if
 	     STORE_FLAG_VALUE is 1 and arithmetic if STORE_FLAG_VALUE is -1,
@@ -10696,6 +10715,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case EQ:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* Convert (lshiftrt (eq FOO 0) C) to (xor FOO 1) if STORE_FLAG_VALUE
 	     says that the sign bit can be tested, FOO has mode MODE, C is
 	     GET_MODE_PRECISION (MODE) - 1, and FOO has only its low-order bit
@@ -10717,6 +10740,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case NEG:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* (lshiftrt (neg A) C) where A is either 0 or 1 and C is one less
 	     than the number of bits in the mode is equivalent to A.  */
 	  if (code == LSHIFTRT
@@ -10740,6 +10767,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case PLUS:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* (lshiftrt (plus A -1) C) where A is either 0 or 1 and C
 	     is one less than the number of bits in the mode is
 	     equivalent to (xor A 1).  */
@@ -10821,6 +10852,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case MINUS:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* If we have (xshiftrt (minus (ashiftrt X C)) X) C)
 	     with C the size of VAROP - 1 and the shift is logical if
 	     STORE_FLAG_VALUE is 1 and arithmetic if STORE_FLAG_VALUE is -1,
@@ -10854,8 +10889,8 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	      && GET_CODE (XEXP (varop, 0)) == LSHIFTRT
 	      && CONST_INT_P (XEXP (XEXP (varop, 0), 1))
 	      && (INTVAL (XEXP (XEXP (varop, 0), 1))
-		  >= (GET_MODE_PRECISION (GET_MODE (XEXP (varop, 0)))
-		      - GET_MODE_PRECISION (GET_MODE (varop)))))
+		  >= (GET_MODE_UNIT_PRECISION (GET_MODE (XEXP (varop, 0)))
+		      - GET_MODE_UNIT_PRECISION (GET_MODE (varop)))))
 	    {
 	      rtx varop_inner = XEXP (varop, 0);
author	Richard Sandiford <richard.sandiford@arm.com>	2016-11-15 17:57:53 +0000
committer	Richard Sandiford <rsandifo@gcc.gnu.org>	2016-11-15 17:57:53 +0000
commit	ba3c3dc0702eca06bde4841e3bbdf0d428b71968 (patch)
tree	51c0c3fdc5c0927ce9f1865ae0afe38fadb0b018
parent	89e64bc0305f36a4abe486ce07cbee7c13a6dc81 (diff)
download	gcc-ba3c3dc0702eca06bde4841e3bbdf0d428b71968.zip gcc-ba3c3dc0702eca06bde4841e3bbdf0d428b71968.tar.gz gcc-ba3c3dc0702eca06bde4841e3bbdf0d428b71968.tar.bz2