aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPan Li <pan2.li@intel.com>2024-06-12 14:28:09 +0800
committerPan Li <pan2.li@intel.com>2024-06-14 22:05:49 +0800
commit869af0255b648727fbd45fd3da4225069cbcb86d (patch)
treed14e356714be280bdc6698838dfcf732685c8b25
parent4b1f486fefb3969f35ff6d49f544eb0ac9f49f1f (diff)
downloadgcc-869af0255b648727fbd45fd3da4225069cbcb86d.zip
gcc-869af0255b648727fbd45fd3da4225069cbcb86d.tar.gz
gcc-869af0255b648727fbd45fd3da4225069cbcb86d.tar.bz2
Match: Support more forms for the scalar unsigned .SAT_SUB
After we support the scalar unsigned form 1 and 2, we would like to introduce more forms include the branch and branchless. There are forms 3-10 list as below: Form 3: #define SAT_SUB_U_3(T) \ T sat_sub_u_3_##T (T x, T y) \ { \ return x > y ? x - y : 0; \ } Form 4: #define SAT_SUB_U_4(T) \ T sat_sub_u_4_##T (T x, T y) \ { \ return x >= y ? x - y : 0; \ } Form 5: #define SAT_SUB_U_5(T) \ T sat_sub_u_5_##T (T x, T y) \ { \ return x < y ? 0 : x - y; \ } Form 6: #define SAT_SUB_U_6(T) \ T sat_sub_u_6_##T (T x, T y) \ { \ return x <= y ? 0 : x - y; \ } Form 7: #define SAT_SUB_U_7(T) \ T sat_sub_u_7_##T (T x, T y) \ { \ T ret; \ T overflow = __builtin_sub_overflow (x, y, &ret); \ return ret & (T)(overflow - 1); \ } Form 8: #define SAT_SUB_U_8(T) \ T sat_sub_u_8_##T (T x, T y) \ { \ T ret; \ T overflow = __builtin_sub_overflow (x, y, &ret); \ return ret & (T)-(!overflow); \ } Form 9: #define SAT_SUB_U_9(T) \ T sat_sub_u_9_##T (T x, T y) \ { \ T ret; \ T overflow = __builtin_sub_overflow (x, y, &ret); \ return overflow ? 0 : ret; \ } Form 10: #define SAT_SUB_U_10(T) \ T sat_sub_u_10_##T (T x, T y) \ { \ T ret; \ T overflow = __builtin_sub_overflow (x, y, &ret); \ return !overflow ? ret : 0; \ } Take form 10 as example: SAT_SUB_U_10(uint64_t); Before this patch: uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y) { unsigned char _1; unsigned char _2; uint8_t _3; __complex__ unsigned char _6; ;; basic block 2, loop depth 0 ;; pred: ENTRY _6 = .SUB_OVERFLOW (x_4(D), y_5(D)); _2 = IMAGPART_EXPR <_6>; if (_2 == 0) goto <bb 3>; [50.00%] else goto <bb 4>; [50.00%] ;; succ: 3 ;; 4 ;; basic block 3, loop depth 0 ;; pred: 2 _1 = REALPART_EXPR <_6>; ;; succ: 4 ;; basic block 4, loop depth 0 ;; pred: 2 ;; 3 # _3 = PHI <0(2), _1(3)> return _3; ;; succ: EXIT } After this patch: uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y) { uint8_t _3; ;; basic block 2, loop depth 0 ;; pred: ENTRY _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call] return _3; ;; succ: EXIT } The below test suites are passed for this patch: 1. The rv64gcv fully regression test with newlib. 2. The rv64gcv build with glibc. 3. The x86 bootstrap test. 4. The x86 fully regression test. gcc/ChangeLog: * match.pd: Add more match for unsigned sat_sub. * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new func impl to match phi node for .SAT_SUB. (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR. Signed-off-by: Pan Li <pan2.li@intel.com>
-rw-r--r--gcc/match.pd25
-rw-r--r--gcc/tree-ssa-math-opts.cc33
2 files changed, 56 insertions, 2 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 3204cf4..99968d3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3147,14 +3147,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* Unsigned saturation sub, case 1 (branch with gt):
SAT_U_SUB = X > Y ? X - Y : 0 */
(match (unsigned_integer_sat_sub @0 @1)
- (cond (gt @0 @1) (minus @0 @1) integer_zerop)
+ (cond^ (gt @0 @1) (minus @0 @1) integer_zerop)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
/* Unsigned saturation sub, case 2 (branch with ge):
SAT_U_SUB = X >= Y ? X - Y : 0. */
(match (unsigned_integer_sat_sub @0 @1)
- (cond (ge @0 @1) (minus @0 @1) integer_zerop)
+ (cond^ (ge @0 @1) (minus @0 @1) integer_zerop)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
@@ -3172,6 +3172,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))
+/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW). */
+(match (unsigned_integer_sat_sub @0 @1)
+ (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
+ (plus (imagpart @2) integer_minus_onep))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
+/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW). */
+(match (unsigned_integer_sat_sub @0 @1)
+ (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1))
+ (bit_xor (imagpart @2) integer_onep))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
+/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW). */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop)
+ (realpart @2) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @0, @1))))
+
/* x > y && x != XXX_MIN --> x > y
x > y && x == XXX_MIN --> false . */
(for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index c09e900..5708548 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt)
build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]);
}
+/*
+ * Try to match saturation unsigned sub.
+ * <bb 2> [local count: 1073741824]:
+ * if (x_2(D) > y_3(D))
+ * goto <bb 3>; [50.00%]
+ * else
+ * goto <bb 4>; [50.00%]
+ *
+ * <bb 3> [local count: 536870912]:
+ * _4 = x_2(D) - y_3(D);
+ *
+ * <bb 4> [local count: 1073741824]:
+ * # _1 = PHI <0(2), _4(3)>
+ * =>
+ * <bb 4> [local count: 1073741824]:
+ * _1 = .SAT_SUB (x_2(D), y_3(D)); */
+static void
+match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi)
+{
+ if (gimple_phi_num_args (phi) != 2)
+ return;
+
+ tree ops[2];
+ tree phi_result = gimple_phi_result (phi);
+
+ if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL))
+ build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result,
+ ops[0], ops[1]);
+}
+
/* Recognize for unsigned x
x = y - z;
if (x > y)
@@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
{
gimple_stmt_iterator gsi = gsi_after_labels (bb);
match_unsigned_saturation_add (&gsi, psi.phi ());
+ match_unsigned_saturation_sub (&gsi, psi.phi ());
}
for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
@@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
continue;
}
match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
+ match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
break;
case PLUS_EXPR:
@@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
break;
case COND_EXPR:
+ case BIT_AND_EXPR:
match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt));
break;