Vect: Add support for dot-product where the sign for the multiplicant changes.

This patch adds support for a dot product where the sign of the multiplication arguments differ. i.e. one is signed and one is unsigned but the precisions are the same. #define N 480 #define SIGNEDNESS_1 unsigned #define SIGNEDNESS_2 signed #define SIGNEDNESS_3 signed #define SIGNEDNESS_4 unsigned SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 char *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) { int av = a[i]; int bv = b[i]; SIGNEDNESS_2 short mult = av * bv; res += mult; } return res; } The operations are performed as if the operands were extended to a 32-bit value. As such this operation isn't valid if there is an intermediate conversion to an unsigned value. i.e. if SIGNEDNESS_2 is unsigned. more over if the signs of SIGNEDNESS_3 and SIGNEDNESS_4 are flipped the same optab is used but the operands are flipped in the optab expansion. To support this the patch extends the dot-product detection to optionally ignore operands with different signs and stores this information in the optab subtype which is now made a bitfield. The subtype can now additionally controls which optab an EXPR can expand to. gcc/ChangeLog: * optabs.def (usdot_prod_optab): New. * doc/md.texi: Document it and clarify other dot prod optabs. * optabs-tree.h (enum optab_subtype): Add optab_vector_mixed_sign. * optabs-tree.c (optab_for_tree_code): Support usdot_prod_optab. * optabs.c (expand_widen_pattern_expr): Likewise. * tree-cfg.c (verify_gimple_assign_ternary): Likewise. * tree-vect-loop.c (vectorizable_reduction): Query dot-product kind. * tree-vect-patterns.c (vect_supportable_direct_optab_p): Take optional optab subtype. (vect_widened_op_tree): Optionally ignore mismatch types. (vect_recog_dot_prod_pattern): Support usdot_prod_optab.
author: Tamar Christina <tamar.christina@arm.com> 2021-07-14 14:54:26 +0100
committer: Tamar Christina <tamar.christina@arm.com> 2021-07-14 14:54:26 +0100
commit: ab0a6b213abf6843b59cdea6399030e828109551 (patch)
tree: a351721ec47290ef4a9aed4819bf722fbf31cb98 /gcc/tree-vect-patterns.c
parent: cc11b924bfe7752edbba052ca71653f46a60887a (diff)
download: gcc-ab0a6b213abf6843b59cdea6399030e828109551.zip
gcc-ab0a6b213abf6843b59cdea6399030e828109551.tar.gz
gcc-ab0a6b213abf6843b59cdea6399030e828109551.tar.bz2
1 files changed, 65 insertions, 21 deletions
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index b2e7fc2..71533e6 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -191,9 +191,9 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
 }
 
 /* Return true if the target supports a vector version of CODE,
-   where CODE is known to map to a direct optab.  ITYPE specifies
-   the type of (some of) the scalar inputs and OTYPE specifies the
-   type of the scalar result.
+   where CODE is known to map to a direct optab with the given SUBTYPE.
+   ITYPE specifies the type of (some of) the scalar inputs and OTYPE
+   specifies the type of the scalar result.
 
    If CODE allows the inputs and outputs to have different type
    (such as for WIDEN_SUM_EXPR), it is the input mode rather
@@ -208,7 +208,8 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
 static bool
 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
 				 tree itype, tree *vecotype_out,
-				 tree *vecitype_out = NULL)
+				 tree *vecitype_out = NULL,
+				 enum optab_subtype subtype = optab_default)
 {
   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
   if (!vecitype)
@@ -218,7 +219,7 @@ vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
   if (!vecotype)
     return false;
 
-  optab optab = optab_for_tree_code (code, vecitype, optab_default);
+  optab optab = optab_for_tree_code (code, vecitype, subtype);
   if (!optab)
     return false;
 
@@ -521,6 +522,7 @@ vect_joust_widened_type (tree type, tree new_type, tree *common_type)
   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
 				TYPE_PRECISION (new_type));
   precision *= 2;
+
   if (precision * 2 > TYPE_PRECISION (type))
     return false;
 
@@ -539,6 +541,10 @@ vect_joust_widened_type (tree type, tree new_type, tree *common_type)
    to a type that (a) is narrower than the result of STMT_INFO and
    (b) can hold all leaf operand values.
 
+   If SUBTYPE then allow that the signs of the operands
+   may differ in signs but not in precision.  SUBTYPE is updated to reflect
+   this.
+
    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
    exists.  */
 
@@ -546,7 +552,8 @@ static unsigned int
 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
 		      tree_code widened_code, bool shift_p,
 		      unsigned int max_nops,
-		      vect_unpromoted_value *unprom, tree *common_type)
+		      vect_unpromoted_value *unprom, tree *common_type,
+		      enum optab_subtype *subtype = NULL)
 {
   /* Check for an integer operation with the right code.  */
   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
@@ -607,7 +614,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
 		= vinfo->lookup_def (this_unprom->op);
 	      nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
 					   widened_code, shift_p, max_nops,
-					   this_unprom, common_type);
+					   this_unprom, common_type,
+					   subtype);
 	      if (nops == 0)
 		return 0;
 
@@ -625,7 +633,18 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
 		*common_type = this_unprom->type;
 	      else if (!vect_joust_widened_type (type, this_unprom->type,
 						 common_type))
-		return 0;
+		{
+		  if (subtype)
+		    {
+		      /* See if we can sign extend the smaller type.  */
+		      if (TYPE_PRECISION (this_unprom->type)
+			  > TYPE_PRECISION (*common_type))
+			*common_type = this_unprom->type;
+		      *subtype = optab_vector_mixed_sign;
+		    }
+		  else
+		    return 0;
+		}
 	    }
 	}
       next_op += nops;
@@ -725,12 +744,22 @@ vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
 
 /* Convert UNPROM to TYPE and return the result, adding new statements
    to STMT_INFO's pattern definition statements if no better way is
-   available.  VECTYPE is the vector form of TYPE.  */
+   available.  VECTYPE is the vector form of TYPE.
+
+   If SUBTYPE then convert the type based on the subtype.  */
 
 static tree
 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
-		    vect_unpromoted_value *unprom, tree vectype)
+		    vect_unpromoted_value *unprom, tree vectype,
+		    enum optab_subtype subtype = optab_default)
 {
+
+  /* Update the type if the signs differ.  */
+  if (subtype == optab_vector_mixed_sign
+      && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
+    type = build_nonstandard_integer_type (TYPE_PRECISION (type),
+					   TYPE_SIGN (unprom->type));
+
   /* Check for a no-op conversion.  */
   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
     return unprom->op;
@@ -806,12 +835,14 @@ vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
 }
 
 /* Invoke vect_convert_input for N elements of UNPROM and store the
-   result in the corresponding elements of RESULT.  */
+   result in the corresponding elements of RESULT.
+
+   If SUBTYPE then convert the type based on the subtype.  */
 
 static void
 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
 		     tree *result, tree type, vect_unpromoted_value *unprom,
-		     tree vectype)
+		     tree vectype, enum optab_subtype subtype = optab_default)
 {
   for (unsigned int i = 0; i < n; ++i)
     {
@@ -819,11 +850,12 @@ vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
       for (j = 0; j < i; ++j)
 	if (unprom[j].op == unprom[i].op)
 	  break;
+
       if (j < i)
 	result[i] = result[j];
       else
 	result[i] = vect_convert_input (vinfo, stmt_info,
-					type, &unprom[i], vectype);
+					type, &unprom[i], vectype, subtype);
     }
 }
 
@@ -895,7 +927,8 @@ vect_reassociating_reduction_p (vec_info *vinfo,
 
    Try to find the following pattern:
 
-     type x_t, y_t;
+     type1a x_t
+     type1b y_t;
      TYPE1 prod;
      TYPE2 sum = init;
    loop:
@@ -908,9 +941,9 @@ vect_reassociating_reduction_p (vec_info *vinfo,
      [S6  prod = (TYPE2) prod;  #optional]
      S7  sum_1 = prod + sum_0;
 
-   where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
-   same size of 'TYPE1' or bigger. This is a special case of a reduction
-   computation.
+   where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
+   the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
+   'type1a' and 'type1b' can differ.
 
    Input:
 
@@ -953,7 +986,8 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
      In which
      - DX is double the size of X
      - DY is double the size of Y
-     - DX, DY, DPROD all have the same type
+     - DX, DY, DPROD all have the same type but the sign
+       between X, Y and DPROD can differ.
      - sum is the same size of DPROD or bigger
      - sum has been recognized as a reduction variable.
 
@@ -991,8 +1025,18 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
      inside the loop (in case we are analyzing an outer-loop).  */
   vect_unpromoted_value unprom0[2];
+  enum optab_subtype subtype = optab_vector;
   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
-			     false, 2, unprom0, &half_type))
+			     false, 2, unprom0, &half_type, &subtype))
+    return NULL;
+
+  /* If there are two widening operations, make sure they agree on the sign
+     of the extension.  The result of an optab_vector_mixed_sign operation
+     is signed; otherwise, the result has the same sign as the operands.  */
+  if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
+      && (subtype == optab_vector_mixed_sign
+	? TYPE_UNSIGNED (unprom_mult.type)
+	: TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
     return NULL;
 
   /* If there are two widening operations, make sure they agree on
@@ -1005,13 +1049,13 @@ vect_recog_dot_prod_pattern (vec_info *vinfo,
 
   tree half_vectype;
   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
-					type_out, &half_vectype))
+					type_out, &half_vectype, subtype))
     return NULL;
 
   /* Get the inputs in the appropriate types.  */
   tree mult_oprnd[2];
   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
-		       unprom0, half_vectype);
+		       unprom0, half_vectype, subtype);
 
   var = vect_recog_temp_ssa_var (type, NULL);
   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
author	Tamar Christina <tamar.christina@arm.com>	2021-07-14 14:54:26 +0100
committer	Tamar Christina <tamar.christina@arm.com>	2021-07-14 14:54:26 +0100
commit	ab0a6b213abf6843b59cdea6399030e828109551 (patch)
tree	a351721ec47290ef4a9aed4819bf722fbf31cb98 /gcc/tree-vect-patterns.c
parent	cc11b924bfe7752edbba052ca71653f46a60887a (diff)
download	gcc-ab0a6b213abf6843b59cdea6399030e828109551.zip gcc-ab0a6b213abf6843b59cdea6399030e828109551.tar.gz gcc-ab0a6b213abf6843b59cdea6399030e828109551.tar.bz2