diff options
author | Pan Li <pan2.li@intel.com> | 2024-05-15 10:14:05 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2024-05-16 20:08:16 +0800 |
commit | 52b0536710ff3f3ace72ab00ce9ef6c630cd1183 (patch) | |
tree | 0e33ea915143143205a0c52ebd4c5e89b1726b7f | |
parent | da73261ce7731be7f2b164f1db796878cdc23365 (diff) | |
download | gcc-52b0536710ff3f3ace72ab00ce9ef6c630cd1183.zip gcc-52b0536710ff3f3ace72ab00ce9ef6c630cd1183.tar.gz gcc-52b0536710ff3f3ace72ab00ce9ef6c630cd1183.tar.bz2 |
Internal-fn: Support new IFN SAT_ADD for unsigned scalar int
This patch would like to add the middle-end presentation for the
saturation add. Aka set the result of add to the max when overflow.
It will take the pattern similar as below.
SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))
Take uint8_t as example, we will have:
* SAT_ADD (1, 254) => 255.
* SAT_ADD (1, 255) => 255.
* SAT_ADD (2, 255) => 255.
* SAT_ADD (255, 255) => 255.
Given below example for the unsigned scalar integer uint64_t:
uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}
Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
long unsigned int _1;
_Bool _2;
long unsigned int _3;
long unsigned int _4;
uint64_t _7;
long unsigned int _10;
__complex__ long unsigned int _11;
;; basic block 2, loop depth 0
;; pred: ENTRY
_11 = .ADD_OVERFLOW (x_5(D), y_6(D));
_1 = REALPART_EXPR <_11>;
_10 = IMAGPART_EXPR <_11>;
_2 = _10 != 0;
_3 = (long unsigned int) _2;
_4 = -_3;
_7 = _1 | _4;
return _7;
;; succ: EXIT
}
After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
uint64_t _7;
;; basic block 2, loop depth 0
;; pred: ENTRY
_7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
return _7;
;; succ: EXIT
}
The below tests are passed for this patch:
1. The riscv fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.
PR target/51492
PR target/112600
gcc/ChangeLog:
* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
to the return true switch case(s).
* internal-fn.def (SAT_ADD): Add new signed optab SAT_ADD.
* match.pd: Add unsigned SAT_ADD match(es).
* optabs.def (OPTAB_NL): Remove fixed-point limitation for
us/ssadd.
* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
extern func decl generated in match.pd match.
(match_saturation_arith): New func impl to match the saturation arith.
(math_opts_dom_walker::after_dom_children): Try match saturation
arith when IOR expr.
Signed-off-by: Pan Li <pan2.li@intel.com>
-rw-r--r-- | gcc/internal-fn.cc | 1 | ||||
-rw-r--r-- | gcc/internal-fn.def | 2 | ||||
-rw-r--r-- | gcc/match.pd | 51 | ||||
-rw-r--r-- | gcc/optabs.def | 4 | ||||
-rw-r--r-- | gcc/tree-ssa-math-opts.cc | 32 |
5 files changed, 88 insertions, 2 deletions
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 0a7053c..73045ca 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn) case IFN_UBSAN_CHECK_MUL: case IFN_ADD_OVERFLOW: case IFN_MUL_OVERFLOW: + case IFN_SAT_ADD: case IFN_VEC_WIDEN_PLUS: case IFN_VEC_WIDEN_PLUS_LO: case IFN_VEC_WIDEN_PLUS_HI: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 848bb9d..25badbb 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first, DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, smulhrs, umulhrs, binary) +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary) + DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary) DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary) DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary) diff --git a/gcc/match.pd b/gcc/match.pd index 07e743a..0f9c34f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3043,6 +3043,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) || POINTER_TYPE_P (itype)) && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype)))))) +/* Unsigned Saturation Add */ +(match (usadd_left_part_1 @0 @1) + (plus:c @0 @1) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +(match (usadd_left_part_2 @0 @1) + (realpart (IFN_ADD_OVERFLOW:c @0 @1)) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +(match (usadd_right_part_1 @0 @1) + (negate (convert (lt (plus:c @0 @1) @0))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +(match (usadd_right_part_1 @0 @1) + (negate (convert (gt @0 (plus:c @0 @1)))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +(match (usadd_right_part_2 @0 @1) + (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop))) + (if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@0)) + && types_match (type, TREE_TYPE (@1))))) + +/* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2 + because the sub part of left_part_2 cannot work with right_part_1. + For example, left_part_2 pattern focus one .ADD_OVERFLOW but the + right_part_1 has nothing to do with .ADD_OVERFLOW. */ + +/* Unsigned saturation add, case 1 (branchless): + SAT_U_ADD = (X + Y) | - ((X + Y) < X) or + SAT_U_ADD = (X + Y) | - (X > (X + Y)). */ +(match (unsigned_integer_sat_add @0 @1) + (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1))) + +/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW). */ +(match (unsigned_integer_sat_add @0 @1) + (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1))) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/optabs.def b/gcc/optabs.def index ad14f93..3f2cb46 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3") OPTAB_NX(add_optab, "add$Q$a3") OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc) OPTAB_VX(addv_optab, "add$F$a3") -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', gen_signed_fixed_libfunc) -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', gen_unsigned_fixed_libfunc) +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', gen_signed_fixed_libfunc) +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', gen_unsigned_fixed_libfunc) OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libfunc) OPTAB_NX(sub_optab, "sub$F$a3") OPTAB_NX(sub_optab, "sub$Q$a3") diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index e8c804f..62da1c5 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4086,6 +4086,36 @@ arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, return 0; } +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)); + +/* + * Try to match saturation arith pattern(s). + * 1. SAT_ADD (unsigned) + * _7 = _4 + _6; + * _8 = _4 > _7; + * _9 = (long unsigned int) _8; + * _10 = -_9; + * _12 = _7 | _10; + * => + * _12 = .SAT_ADD (_4, _6); */ +static void +match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt) +{ + gcall *call = NULL; + + tree ops[2]; + tree lhs = gimple_assign_lhs (stmt); + + if (gimple_unsigned_integer_sat_add (lhs, ops, NULL) + && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs), + OPTIMIZE_FOR_BOTH)) + { + call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1]); + gimple_call_set_lhs (call, lhs); + gsi_replace (gsi, call, true); + } +} + /* Recognize for unsigned x x = y - z; if (x > y) @@ -6048,6 +6078,8 @@ math_opts_dom_walker::after_dom_children (basic_block bb) break; case BIT_IOR_EXPR: + match_saturation_arith (&gsi, as_a<gassign *> (stmt)); + /* fall-through */ case BIT_XOR_EXPR: match_uaddc_usubc (&gsi, stmt, code); break; |