aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorPan Li <pan2.li@intel.com>2024-09-21 09:28:39 +0800
committerPan Li <pan2.li@intel.com>2024-09-26 09:00:53 +0800
commit342221f35336c7620144948536bc690ef4e8d552 (patch)
treebcf34daaf1a96a6b01ed88c241940c06b8c8dacd /gcc
parent9d76276bca6d7bb93167a1a7ab48678f76a3b083 (diff)
downloadgcc-342221f35336c7620144948536bc690ef4e8d552.zip
gcc-342221f35336c7620144948536bc690ef4e8d552.tar.gz
gcc-342221f35336c7620144948536bc690ef4e8d552.tar.bz2
Match: Support form 3 for vector signed integer .SAT_ADD
This patch would like to support the form 3 of the vector signed integer .SAT_ADD. Aka below example: Form 3: #define DEF_VEC_SAT_S_ADD_FMT_3(T, UT, MIN, MAX) \ void __attribute__((noinline)) \ vec_sat_s_add_##T##_fmt_3 (T *out, T *op_1, T *op_2, unsigned limit) \ { \ unsigned i; \ for (i = 0; i < limit; i++) \ { \ T x = op_1[i]; \ T y = op_2[i]; \ T sum; \ bool overflow = __builtin_add_overflow (x, y, &sum); \ out[i] = overflow ? x < 0 ? MIN : MAX : sum; \ } \ } DEF_VEC_SAT_S_ADD_FMT_3(int8_t, uint8_t, INT8_MIN, INT8_MAX) Before this patch: 40 │ # ivtmp.7_34 = PHI <0(3), ivtmp.7_30(7)> 41 │ _26 = op_1_12(D) + ivtmp.7_34; 42 │ x_29 = MEM[(int8_t *)_26]; 43 │ _1 = op_2_14(D) + ivtmp.7_34; 44 │ y_24 = MEM[(int8_t *)_1]; 45 │ _9 = .ADD_OVERFLOW (y_24, x_29); 46 │ _7 = IMAGPART_EXPR <_9>; 47 │ if (_7 != 0) 48 │ goto <bb 6>; [50.00%] 49 │ else 50 │ goto <bb 5>; [50.00%] 51 │ ;; succ: 6 52 │ ;; 5 53 │ 54 │ ;; basic block 5, loop depth 1 55 │ ;; pred: 4 56 │ _42 = REALPART_EXPR <_9>; 57 │ _2 = out_17(D) + ivtmp.7_34; 58 │ MEM[(int8_t *)_2] = _42; 59 │ ivtmp.7_27 = ivtmp.7_34 + 1; 60 │ if (_13 != ivtmp.7_27) 61 │ goto <bb 7>; [89.00%] 62 │ else 63 │ goto <bb 8>; [11.00%] 64 │ ;; succ: 7 65 │ ;; 8 66 │ 67 │ ;; basic block 6, loop depth 1 68 │ ;; pred: 4 69 │ _38 = x_29 < 0; 70 │ _39 = (signed char) _38; 71 │ _40 = -_39; 72 │ _41 = _40 ^ 127; 73 │ _33 = out_17(D) + ivtmp.7_34; 74 │ MEM[(int8_t *)_33] = _41; 75 │ ivtmp.7_25 = ivtmp.7_34 + 1; 76 │ if (_13 != ivtmp.7_25) After this patch: 77 │ _94 = .SELECT_VL (ivtmp_92, POLY_INT_CST [16, 16]); 78 │ vect_x_13.9_81 = .MASK_LEN_LOAD (vectp_op_1.7_79, 8B, { -1, ... }, _94, 0); 79 │ vect_y_15.12_85 = .MASK_LEN_LOAD (vectp_op_2.10_83, 8B, { -1, ... }, _94, 0); 80 │ vect_patt_49.13_86 = .SAT_ADD (vect_x_13.9_81, vect_y_15.12_85); 81 │ .MASK_LEN_STORE (vectp_out.14_88, 8B, { -1, ... }, _94, 0, vect_patt_49.13_86); 82 │ vectp_op_1.7_80 = vectp_op_1.7_79 + _94; 83 │ vectp_op_2.10_84 = vectp_op_2.10_83 + _94; 84 │ vectp_out.14_89 = vectp_out.14_88 + _94; 85 │ ivtmp_93 = ivtmp_92 - _94; The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add optional nop_convert for signed SAT_ADD case 4. Signed-off-by: Pan Li <pan2.li@intel.com>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/match.pd4
1 files changed, 3 insertions, 1 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index cae3c5f..ca48c47 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3246,7 +3246,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
SAT_S_ADD = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : sum; */
(match (signed_integer_sat_add @0 @1)
(cond^ (ne (imagpart (IFN_ADD_OVERFLOW:c@2 @0 @1)) integer_zerop)
- (bit_xor:c (negate (convert (lt @0 integer_zerop))) max_value)
+ (bit_xor:c (nop_convert?
+ (negate (nop_convert? (convert (lt @0 integer_zerop)))))
+ max_value)
(realpart @2))
(if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
&& types_match (type, @0, @1))))