Vect: Support new IFN SAT_ADD for unsigned vector int

For vectorize, we leverage the existing vect pattern recog to find the pattern similar to scalar and let the vectorizer to perform the rest part for standard name usadd<mode>3 in vector mode. The riscv vector backend have insn "Vector Single-Width Saturating Add and Subtract" which can be leveraged when expand the usadd<mode>3 in vector mode. For example: void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n) { unsigned i; for (i = 0; i < n; i++) out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i])); } Before this patch: void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n) { ... _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]); ivtmp_58 = _80 * 8; vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0); vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0); vect__7.11_66 = vect__4.7_61 + vect__6.10_65; mask__8.12_67 = vect__4.7_61 > vect__7.11_66; vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, vect__7.11_66); .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72); vectp_x.5_60 = vectp_x.5_59 + ivtmp_58; vectp_y.8_64 = vectp_y.8_63 + ivtmp_58; vectp_out.16_75 = vectp_out.16_74 + ivtmp_58; ivtmp_79 = ivtmp_78 - _80; ... } After this patch: void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n) { ... _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]); ivtmp_46 = _62 * 8; vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0); vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0); vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53); .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54); ... } The below test suites are passed for this patch. * The riscv fully regression tests. * The x86 bootstrap tests. * The x86 fully regression tests. PR target/51492 PR target/112600 gcc/ChangeLog: * tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New func decl generated by match.pd match. (vect_recog_sat_add_pattern): New func impl to recog the pattern for unsigned SAT_ADD. Signed-off-by: Pan Li <pan2.li@intel.com>
author: Pan Li <pan2.li@intel.com> 2024-05-15 10:14:06 +0800
committer: Pan Li <pan2.li@intel.com> 2024-05-16 20:09:30 +0800
commit: d4dee347b3fe1982bab26485ff31cd039c9df010 (patch)
tree: 4ab9e18ffa8bf7c750e77dda2706e6a8c43713cb
parent: 52b0536710ff3f3ace72ab00ce9ef6c630cd1183 (diff)
download: gcc-d4dee347b3fe1982bab26485ff31cd039c9df010.zip
gcc-d4dee347b3fe1982bab26485ff31cd039c9df010.tar.gz
gcc-d4dee347b3fe1982bab26485ff31cd039c9df010.tar.bz2
1 files changed, 52 insertions, 0 deletions
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index dfb7d80..a313dc6 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4487,6 +4487,57 @@ vect_recog_mult_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+
+/*
+ * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
+ *   _7 = _4 + _6;
+ *   _8 = _4 > _7;
+ *   _9 = (long unsigned int) _8;
+ *   _10 = -_9;
+ *   _12 = _7 | _10;
+ *
+ * And then simplied to
+ *   _12 = .SAT_ADD (_4, _6);
+ */
+
+static gimple *
+vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+			    tree *type_out)
+{
+  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  tree res_ops[2];
+  tree lhs = gimple_assign_lhs (last_stmt);
+
+  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
+    {
+      tree itype = TREE_TYPE (res_ops[0]);
+      tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+      if (vtype != NULL_TREE
+	&& direct_internal_fn_supported_p (IFN_SAT_ADD, vtype,
+					   OPTIMIZE_FOR_BOTH))
+	{
+	  *type_out = vtype;
+	  gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, res_ops[0],
+						    res_ops[1]);
+
+	  gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+	  gimple_call_set_nothrow (call, /* nothrow_p */ false);
+	  gimple_set_location (call, gimple_location (last_stmt));
+
+	  vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
+	  return call;
+	}
+    }
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
@@ -6987,6 +7038,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   { vect_recog_divmod_pattern, "divmod" },
   { vect_recog_mult_pattern, "mult" },
+  { vect_recog_sat_add_pattern, "sat_add" },
   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
   { vect_recog_gcond_pattern, "gcond" },
   { vect_recog_bool_pattern, "bool" },
author	Pan Li <pan2.li@intel.com>	2024-05-15 10:14:06 +0800
committer	Pan Li <pan2.li@intel.com>	2024-05-16 20:09:30 +0800
commit	d4dee347b3fe1982bab26485ff31cd039c9df010 (patch)
tree	4ab9e18ffa8bf7c750e77dda2706e6a8c43713cb
parent	52b0536710ff3f3ace72ab00ce9ef6c630cd1183 (diff)
download	gcc-d4dee347b3fe1982bab26485ff31cd039c9df010.zip gcc-d4dee347b3fe1982bab26485ff31cd039c9df010.tar.gz gcc-d4dee347b3fe1982bab26485ff31cd039c9df010.tar.bz2