diff options
author | Pan Li <pan2.li@intel.com> | 2024-06-12 14:28:09 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2024-06-14 22:05:49 +0800 |
commit | 869af0255b648727fbd45fd3da4225069cbcb86d (patch) | |
tree | d14e356714be280bdc6698838dfcf732685c8b25 | |
parent | 4b1f486fefb3969f35ff6d49f544eb0ac9f49f1f (diff) | |
download | gcc-869af0255b648727fbd45fd3da4225069cbcb86d.zip gcc-869af0255b648727fbd45fd3da4225069cbcb86d.tar.gz gcc-869af0255b648727fbd45fd3da4225069cbcb86d.tar.bz2 |
Match: Support more forms for the scalar unsigned .SAT_SUB
After we support the scalar unsigned form 1 and 2, we would like
to introduce more forms include the branch and branchless. There
are forms 3-10 list as below:
Form 3:
#define SAT_SUB_U_3(T) \
T sat_sub_u_3_##T (T x, T y) \
{ \
return x > y ? x - y : 0; \
}
Form 4:
#define SAT_SUB_U_4(T) \
T sat_sub_u_4_##T (T x, T y) \
{ \
return x >= y ? x - y : 0; \
}
Form 5:
#define SAT_SUB_U_5(T) \
T sat_sub_u_5_##T (T x, T y) \
{ \
return x < y ? 0 : x - y; \
}
Form 6:
#define SAT_SUB_U_6(T) \
T sat_sub_u_6_##T (T x, T y) \
{ \
return x <= y ? 0 : x - y; \
}
Form 7:
#define SAT_SUB_U_7(T) \
T sat_sub_u_7_##T (T x, T y) \
{ \
T ret; \
T overflow = __builtin_sub_overflow (x, y, &ret); \
return ret & (T)(overflow - 1); \
}
Form 8:
#define SAT_SUB_U_8(T) \
T sat_sub_u_8_##T (T x, T y) \
{ \
T ret; \
T overflow = __builtin_sub_overflow (x, y, &ret); \
return ret & (T)-(!overflow); \
}
Form 9:
#define SAT_SUB_U_9(T) \
T sat_sub_u_9_##T (T x, T y) \
{ \
T ret; \
T overflow = __builtin_sub_overflow (x, y, &ret); \
return overflow ? 0 : ret; \
}
Form 10:
#define SAT_SUB_U_10(T) \
T sat_sub_u_10_##T (T x, T y) \
{ \
T ret; \
T overflow = __builtin_sub_overflow (x, y, &ret); \
return !overflow ? ret : 0; \
}
Take form 10 as example:
SAT_SUB_U_10(uint64_t);
Before this patch:
uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
{
unsigned char _1;
unsigned char _2;
uint8_t _3;
__complex__ unsigned char _6;
;; basic block 2, loop depth 0
;; pred: ENTRY
_6 = .SUB_OVERFLOW (x_4(D), y_5(D));
_2 = IMAGPART_EXPR <_6>;
if (_2 == 0)
goto <bb 3>; [50.00%]
else
goto <bb 4>; [50.00%]
;; succ: 3
;; 4
;; basic block 3, loop depth 0
;; pred: 2
_1 = REALPART_EXPR <_6>;
;; succ: 4
;; basic block 4, loop depth 0
;; pred: 2
;; 3
# _3 = PHI <0(2), _1(3)>
return _3;
;; succ: EXIT
}
After this patch:
uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y)
{
uint8_t _3;
;; basic block 2, loop depth 0
;; pred: ENTRY
_3 = .SAT_SUB (x_4(D), y_5(D)); [tail call]
return _3;
;; succ: EXIT
}
The below test suites are passed for this patch:
1. The rv64gcv fully regression test with newlib.
2. The rv64gcv build with glibc.
3. The x86 bootstrap test.
4. The x86 fully regression test.
gcc/ChangeLog:
* match.pd: Add more match for unsigned sat_sub.
* tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new
func impl to match phi node for .SAT_SUB.
(math_opts_dom_walker::after_dom_children): Try match .SAT_SUB
for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR.
Signed-off-by: Pan Li <pan2.li@intel.com>
-rw-r--r-- | gcc/match.pd | 25 | ||||
-rw-r--r-- | gcc/tree-ssa-math-opts.cc | 33 |
2 files changed, 56 insertions, 2 deletions
diff --git a/gcc/match.pd b/gcc/match.pd index 3204cf4..99968d3 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3147,14 +3147,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) - (cond (gt @0 @1) (minus @0 @1) integer_zerop) + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) /* Unsigned saturation sub, case 2 (branch with ge): SAT_U_SUB = X >= Y ? X - Y : 0. */ (match (unsigned_integer_sat_sub @0 @1) - (cond (ge @0 @1) (minus @0 @1) integer_zerop) + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) @@ -3172,6 +3172,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW). */ +(match (unsigned_integer_sat_sub @0 @1) + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1)) + (plus (imagpart @2) integer_minus_onep)) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW). */ +(match (unsigned_integer_sat_sub @0 @1) + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1)) + (bit_xor (imagpart @2) integer_onep)) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW). */ +(match (unsigned_integer_sat_sub @0 @1) + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop) + (realpart @2) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index c09e900..5708548 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]); } +/* + * Try to match saturation unsigned sub. + * <bb 2> [local count: 1073741824]: + * if (x_2(D) > y_3(D)) + * goto <bb 3>; [50.00%] + * else + * goto <bb 4>; [50.00%] + * + * <bb 3> [local count: 536870912]: + * _4 = x_2(D) - y_3(D); + * + * <bb 4> [local count: 1073741824]: + * # _1 = PHI <0(2), _4(3)> + * => + * <bb 4> [local count: 1073741824]: + * _1 = .SAT_SUB (x_2(D), y_3(D)); */ +static void +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi) +{ + if (gimple_phi_num_args (phi) != 2) + return; + + tree ops[2]; + tree phi_result = gimple_phi_result (phi); + + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL)) + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result, + ops[0], ops[1]); +} + /* Recognize for unsigned x x = y - z; if (x > y) @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) { gimple_stmt_iterator gsi = gsi_after_labels (bb); match_unsigned_saturation_add (&gsi, psi.phi ()); + match_unsigned_saturation_sub (&gsi, psi.phi ()); } for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) continue; } match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p); + match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); break; case PLUS_EXPR: @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) break; case COND_EXPR: + case BIT_AND_EXPR: match_unsigned_saturation_sub (&gsi, as_a<gassign *> (stmt)); break; |