diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.md | 73 | ||||
-rw-r--r-- | gcc/doc/md.texi | 16 | ||||
-rw-r--r-- | gcc/fold-const-call.cc | 24 | ||||
-rw-r--r-- | gcc/gimple-fold.cc | 29 | ||||
-rw-r--r-- | gcc/gimple-range-fold.cc | 2 | ||||
-rw-r--r-- | gcc/internal-fn.cc | 39 | ||||
-rw-r--r-- | gcc/internal-fn.def | 2 | ||||
-rw-r--r-- | gcc/optabs.def | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-1.c | 59 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-10.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-2.c | 59 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-3.c | 61 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-4.c | 61 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-5.c | 32 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-6.c | 33 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-7.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-8.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr79173-9.c | 31 | ||||
-rw-r--r-- | gcc/tree-ssa-dce.cc | 8 | ||||
-rw-r--r-- | gcc/tree-ssa-math-opts.cc | 500 |
20 files changed, 1118 insertions, 6 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 75eda25..394c767 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8558,6 +8558,67 @@ (minus:SWI48 (match_dup 1) (match_dup 2)))])] "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)") +(define_expand "uaddc<mode>5" + [(match_operand:SWI48 0 "register_operand") + (match_operand:SWI48 1 "register_operand") + (match_operand:SWI48 2 "register_operand") + (match_operand:SWI48 3 "register_operand") + (match_operand:SWI48 4 "nonmemory_operand")] + "" +{ + rtx cf = gen_rtx_REG (CCCmode, FLAGS_REG), pat, pat2; + if (operands[4] == const0_rtx) + emit_insn (gen_addcarry<mode>_0 (operands[0], operands[2], operands[3])); + else + { + rtx op4 = copy_to_mode_reg (QImode, + convert_to_mode (QImode, operands[4], 1)); + emit_insn (gen_addqi3_cconly_overflow (op4, constm1_rtx)); + pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx); + pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx); + emit_insn (gen_addcarry<mode> (operands[0], operands[2], operands[3], + cf, pat, pat2)); + } + rtx cc = gen_reg_rtx (QImode); + pat = gen_rtx_LTU (QImode, cf, const0_rtx); + emit_insn (gen_rtx_SET (cc, pat)); + emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc)); + DONE; +}) + +(define_expand "usubc<mode>5" + [(match_operand:SWI48 0 "register_operand") + (match_operand:SWI48 1 "register_operand") + (match_operand:SWI48 2 "register_operand") + (match_operand:SWI48 3 "register_operand") + (match_operand:SWI48 4 "nonmemory_operand")] + "" +{ + rtx cf, pat, pat2; + if (operands[4] == const0_rtx) + { + cf = gen_rtx_REG (CCmode, FLAGS_REG); + emit_insn (gen_subborrow<mode>_0 (operands[0], operands[2], + operands[3])); + } + else + { + cf = gen_rtx_REG (CCCmode, FLAGS_REG); + rtx op4 = copy_to_mode_reg (QImode, + convert_to_mode (QImode, operands[4], 1)); + emit_insn (gen_addqi3_cconly_overflow (op4, constm1_rtx)); + pat = gen_rtx_LTU (<DWI>mode, cf, const0_rtx); + pat2 = gen_rtx_LTU (<MODE>mode, cf, const0_rtx); + emit_insn (gen_subborrow<mode> (operands[0], operands[2], operands[3], + cf, pat, pat2)); + } + rtx cc = gen_reg_rtx (QImode); + pat = gen_rtx_LTU (QImode, cf, const0_rtx); + emit_insn (gen_rtx_SET (cc, pat)); + emit_insn (gen_zero_extendqi<mode>2 (operands[1], cc)); + DONE; +}) + (define_mode_iterator CC_CCC [CC CCC]) ;; Pre-reload splitter to optimize @@ -8570,7 +8631,8 @@ "ix86_pre_reload_split ()" "#" "&& 1" - [(const_int 0)]) + [(const_int 0)] + "emit_note (NOTE_INSN_DELETED); DONE;") ;; Set the carry flag from the carry flag. (define_insn_and_split "*setccc" @@ -8579,7 +8641,8 @@ "ix86_pre_reload_split ()" "#" "&& 1" - [(const_int 0)]) + [(const_int 0)] + "emit_note (NOTE_INSN_DELETED); DONE;") ;; Set the carry flag from the carry flag. (define_insn_and_split "*setcc_qi_negqi_ccc_1_<mode>" @@ -8588,7 +8651,8 @@ "ix86_pre_reload_split ()" "#" "&& 1" - [(const_int 0)]) + [(const_int 0)] + "emit_note (NOTE_INSN_DELETED); DONE;") ;; Set the carry flag from the carry flag. (define_insn_and_split "*setcc_qi_negqi_ccc_2_<mode>" @@ -8598,7 +8662,8 @@ "ix86_pre_reload_split ()" "#" "&& 1" - [(const_int 0)]) + [(const_int 0)] + "emit_note (NOTE_INSN_DELETED); DONE;") ;; Overflow setting add instructions diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 4490507..a43fd65 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5224,6 +5224,22 @@ is taken only on unsigned overflow. @item @samp{usubv@var{m}4}, @samp{umulv@var{m}4} Similar, for other unsigned arithmetic operations. +@cindex @code{uaddc@var{m}5} instruction pattern +@item @samp{uaddc@var{m}5} +Adds unsigned operands 2, 3 and 4 (where the last operand is guaranteed to +have only values 0 or 1) together, sets operand 0 to the result of the +addition of the 3 operands and sets operand 1 to 1 iff there was +overflow on the unsigned additions, and to 0 otherwise. So, it is +an addition with carry in (operand 4) and carry out (operand 1). +All operands have the same mode. + +@cindex @code{usubc@var{m}5} instruction pattern +@item @samp{usubc@var{m}5} +Similarly to @samp{uaddc@var{m}5}, except subtracts unsigned operands 3 +and 4 from operand 2 instead of adding them. So, it is +a subtraction with carry/borrow in (operand 4) and carry/borrow out +(operand 1). All operands have the same mode. + @cindex @code{addptr@var{m}3} instruction pattern @item @samp{addptr@var{m}3} Like @code{add@var{m}3} but is guaranteed to only be used for address diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc index 00ff4e4..04be3d2 100644 --- a/gcc/fold-const-call.cc +++ b/gcc/fold-const-call.cc @@ -1937,6 +1937,30 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2) return NULL_TREE; } + case CFN_UADDC: + case CFN_USUBC: + if (integer_cst_p (arg0) && integer_cst_p (arg1) && integer_cst_p (arg2)) + { + tree itype = TREE_TYPE (type); + bool ovf = false; + tree_code subcode = fn == CFN_UADDC ? PLUS_EXPR : MINUS_EXPR; + tree r = int_const_binop (subcode, fold_convert (itype, arg0), + fold_convert (itype, arg1)); + if (!r) + return NULL_TREE; + if (arith_overflowed_p (subcode, itype, arg0, arg1)) + ovf = true; + tree r2 = int_const_binop (subcode, r, fold_convert (itype, arg2)); + if (!r2 || TREE_CODE (r2) != INTEGER_CST) + return NULL_TREE; + if (arith_overflowed_p (subcode, itype, r, arg2)) + ovf = true; + if (TREE_OVERFLOW (r2)) + r2 = drop_tree_overflow (r2); + return build_complex (type, r2, build_int_cst (itype, ovf)); + } + return NULL_TREE; + default: return fold_const_call_1 (fn, type, arg0, arg1, arg2); } diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index df88ad7..55e8056 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -5585,6 +5585,7 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) enum tree_code subcode = ERROR_MARK; tree result = NULL_TREE; bool cplx_result = false; + bool uaddc_usubc = false; tree overflow = NULL_TREE; switch (gimple_call_internal_fn (stmt)) { @@ -5658,6 +5659,16 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) subcode = MULT_EXPR; cplx_result = true; break; + case IFN_UADDC: + subcode = PLUS_EXPR; + cplx_result = true; + uaddc_usubc = true; + break; + case IFN_USUBC: + subcode = MINUS_EXPR; + cplx_result = true; + uaddc_usubc = true; + break; case IFN_MASK_LOAD: changed |= gimple_fold_partial_load (gsi, stmt, true); break; @@ -5677,6 +5688,7 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) { tree arg0 = gimple_call_arg (stmt, 0); tree arg1 = gimple_call_arg (stmt, 1); + tree arg2 = NULL_TREE; tree type = TREE_TYPE (arg0); if (cplx_result) { @@ -5685,9 +5697,26 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) type = NULL_TREE; else type = TREE_TYPE (TREE_TYPE (lhs)); + if (uaddc_usubc) + arg2 = gimple_call_arg (stmt, 2); } if (type == NULL_TREE) ; + else if (uaddc_usubc) + { + if (!integer_zerop (arg2)) + ; + /* x = y + 0 + 0; x = y - 0 - 0; */ + else if (integer_zerop (arg1)) + result = arg0; + /* x = 0 + y + 0; */ + else if (subcode != MINUS_EXPR && integer_zerop (arg0)) + result = arg1; + /* x = y - y - 0; */ + else if (subcode == MINUS_EXPR + && operand_equal_p (arg0, arg1, 0)) + result = integer_zero_node; + } /* x = y + 0; x = y - 0; x = y * 0; */ else if (integer_zerop (arg1)) result = subcode == MULT_EXPR ? integer_zero_node : arg0; diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc index 173d9f3..efcc3d8 100644 --- a/gcc/gimple-range-fold.cc +++ b/gcc/gimple-range-fold.cc @@ -489,6 +489,8 @@ adjust_imagpart_expr (vrange &res, const gimple *stmt) case IFN_ADD_OVERFLOW: case IFN_SUB_OVERFLOW: case IFN_MUL_OVERFLOW: + case IFN_UADDC: + case IFN_USUBC: case IFN_ATOMIC_COMPARE_EXCHANGE: { int_range<2> r; diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index da9b944..208bdf4 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -2776,6 +2776,44 @@ expand_MUL_OVERFLOW (internal_fn, gcall *stmt) expand_arith_overflow (MULT_EXPR, stmt); } +/* Expand UADDC STMT. */ + +static void +expand_UADDC (internal_fn ifn, gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + tree arg1 = gimple_call_arg (stmt, 0); + tree arg2 = gimple_call_arg (stmt, 1); + tree arg3 = gimple_call_arg (stmt, 2); + tree type = TREE_TYPE (arg1); + machine_mode mode = TYPE_MODE (type); + insn_code icode = optab_handler (ifn == IFN_UADDC + ? uaddc5_optab : usubc5_optab, mode); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx op3 = expand_normal (arg3); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx re = gen_reg_rtx (mode); + rtx im = gen_reg_rtx (mode); + class expand_operand ops[5]; + create_output_operand (&ops[0], re, mode); + create_output_operand (&ops[1], im, mode); + create_input_operand (&ops[2], op1, mode); + create_input_operand (&ops[3], op2, mode); + create_input_operand (&ops[4], op3, mode); + expand_insn (icode, 5, ops); + write_complex_part (target, re, false, false); + write_complex_part (target, im, true, false); +} + +/* Expand USUBC STMT. */ + +static void +expand_USUBC (internal_fn ifn, gcall *stmt) +{ + expand_UADDC (ifn, stmt); +} + /* This should get folded in tree-vectorizer.cc. */ static void @@ -4049,6 +4087,7 @@ commutative_ternary_fn_p (internal_fn fn) case IFN_FMS: case IFN_FNMA: case IFN_FNMS: + case IFN_UADDC: return true; default: diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 45a3c98..9da5f31 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -419,6 +419,8 @@ DEF_INTERNAL_FN (ASAN_POISON_USE, ECF_LEAF | ECF_NOTHROW | ECF_NOVOPS, NULL) DEF_INTERNAL_FN (ADD_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (SUB_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (MUL_OVERFLOW, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (UADDC, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (USUBC, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL) DEF_INTERNAL_FN (VEC_CONVERT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) diff --git a/gcc/optabs.def b/gcc/optabs.def index 0a96923..22b31be 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -260,6 +260,8 @@ OPTAB_D (uaddv4_optab, "uaddv$I$a4") OPTAB_D (usubv4_optab, "usubv$I$a4") OPTAB_D (umulv4_optab, "umulv$I$a4") OPTAB_D (negv3_optab, "negv$I$a3") +OPTAB_D (uaddc5_optab, "uaddc$I$a5") +OPTAB_D (usubc5_optab, "usubc$I$a5") OPTAB_D (addptr3_optab, "addptr$a3") OPTAB_D (spaceship_optab, "spaceship$a3") diff --git a/gcc/testsuite/gcc.target/i386/pr79173-1.c b/gcc/testsuite/gcc.target/i386/pr79173-1.c new file mode 100644 index 0000000..af01830 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-1.c @@ -0,0 +1,59 @@ +/* PR middle-end/79173 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ + +static unsigned long +uaddc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned long *carry_out) +{ + unsigned long r; + unsigned long c1 = __builtin_add_overflow (x, y, &r); + unsigned long c2 = __builtin_add_overflow (r, carry_in, &r); + *carry_out = c1 + c2; + return r; +} + +static unsigned long +usubc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned long *carry_out) +{ + unsigned long r; + unsigned long c1 = __builtin_sub_overflow (x, y, &r); + unsigned long c2 = __builtin_sub_overflow (r, carry_in, &r); + *carry_out = c1 + c2; + return r; +} + +void +foo (unsigned long *p, unsigned long *q) +{ + unsigned long c; + p[0] = uaddc (p[0], q[0], 0, &c); + p[1] = uaddc (p[1], q[1], c, &c); + p[2] = uaddc (p[2], q[2], c, &c); + p[3] = uaddc (p[3], q[3], c, &c); +} + +void +bar (unsigned long *p, unsigned long *q) +{ + unsigned long c; + p[0] = usubc (p[0], q[0], 0, &c); + p[1] = usubc (p[1], q[1], c, &c); + p[2] = usubc (p[2], q[2], c, &c); + p[3] = usubc (p[3], q[3], c, &c); +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-10.c b/gcc/testsuite/gcc.target/i386/pr79173-10.c new file mode 100644 index 0000000..fb43dd3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-10.c @@ -0,0 +1,31 @@ +/* PR middle-end/79173 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 12\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, \\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 4\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 8\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 12\\\(%\[^\n\r]*\\\)" 1 } } */ + +#include <x86intrin.h> + +unsigned int +foo (unsigned int *p, unsigned int *q) +{ + unsigned char c = _addcarry_u32 (0, p[0], q[0], &p[0]); + c = _addcarry_u32 (c, p[1], q[1], &p[1]); + c = _addcarry_u32 (c, p[2], q[2], &p[2]); + return _addcarry_u32 (c, p[3], q[3], &p[3]); +} + +unsigned int +bar (unsigned int *p, unsigned int *q) +{ + unsigned char c = _subborrow_u32 (0, p[0], q[0], &p[0]); + c = _subborrow_u32 (c, p[1], q[1], &p[1]); + c = _subborrow_u32 (c, p[2], q[2], &p[2]); + return _subborrow_u32 (c, p[3], q[3], &p[3]); +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-2.c b/gcc/testsuite/gcc.target/i386/pr79173-2.c new file mode 100644 index 0000000..f10eeb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-2.c @@ -0,0 +1,59 @@ +/* PR middle-end/79173 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ + +static unsigned long +uaddc (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out) +{ + unsigned long r; + _Bool c1 = __builtin_add_overflow (x, y, &r); + _Bool c2 = __builtin_add_overflow (r, carry_in, &r); + *carry_out = c1 | c2; + return r; +} + +static unsigned long +usubc (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out) +{ + unsigned long r; + _Bool c1 = __builtin_sub_overflow (x, y, &r); + _Bool c2 = __builtin_sub_overflow (r, carry_in, &r); + *carry_out = c1 | c2; + return r; +} + +void +foo (unsigned long *p, unsigned long *q) +{ + _Bool c; + p[0] = uaddc (p[0], q[0], 0, &c); + p[1] = uaddc (p[1], q[1], c, &c); + p[2] = uaddc (p[2], q[2], c, &c); + p[3] = uaddc (p[3], q[3], c, &c); +} + +void +bar (unsigned long *p, unsigned long *q) +{ + _Bool c; + p[0] = usubc (p[0], q[0], 0, &c); + p[1] = usubc (p[1], q[1], c, &c); + p[2] = usubc (p[2], q[2], c, &c); + p[3] = usubc (p[3], q[3], c, &c); +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-3.c b/gcc/testsuite/gcc.target/i386/pr79173-3.c new file mode 100644 index 0000000..2cdf4e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-3.c @@ -0,0 +1,61 @@ +/* PR middle-end/79173 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ + +static unsigned long +uaddc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned long *carry_out) +{ + unsigned long r; + unsigned long c1 = __builtin_add_overflow (x, y, &r); + unsigned long c2 = __builtin_add_overflow (r, carry_in, &r); + *carry_out = c1 + c2; + return r; +} + +static unsigned long +usubc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned long *carry_out) +{ + unsigned long r; + unsigned long c1 = __builtin_sub_overflow (x, y, &r); + unsigned long c2 = __builtin_sub_overflow (r, carry_in, &r); + *carry_out = c1 + c2; + return r; +} + +unsigned long +foo (unsigned long *p, unsigned long *q) +{ + unsigned long c; + p[0] = uaddc (p[0], q[0], 0, &c); + p[1] = uaddc (p[1], q[1], c, &c); + p[2] = uaddc (p[2], q[2], c, &c); + p[3] = uaddc (p[3], q[3], c, &c); + return c; +} + +unsigned long +bar (unsigned long *p, unsigned long *q) +{ + unsigned long c; + p[0] = usubc (p[0], q[0], 0, &c); + p[1] = usubc (p[1], q[1], c, &c); + p[2] = usubc (p[2], q[2], c, &c); + p[3] = usubc (p[3], q[3], c, &c); + return c; +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-4.c b/gcc/testsuite/gcc.target/i386/pr79173-4.c new file mode 100644 index 0000000..2152489 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-4.c @@ -0,0 +1,61 @@ +/* PR middle-end/79173 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ + +static unsigned long +uaddc (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out) +{ + unsigned long r; + _Bool c1 = __builtin_add_overflow (x, y, &r); + _Bool c2 = __builtin_add_overflow (r, carry_in, &r); + *carry_out = c1 ^ c2; + return r; +} + +static unsigned long +usubc (unsigned long x, unsigned long y, _Bool carry_in, _Bool *carry_out) +{ + unsigned long r; + _Bool c1 = __builtin_sub_overflow (x, y, &r); + _Bool c2 = __builtin_sub_overflow (r, carry_in, &r); + *carry_out = c1 ^ c2; + return r; +} + +_Bool +foo (unsigned long *p, unsigned long *q) +{ + _Bool c; + p[0] = uaddc (p[0], q[0], 0, &c); + p[1] = uaddc (p[1], q[1], c, &c); + p[2] = uaddc (p[2], q[2], c, &c); + p[3] = uaddc (p[3], q[3], c, &c); + return c; +} + +_Bool +bar (unsigned long *p, unsigned long *q) +{ + _Bool c; + p[0] = usubc (p[0], q[0], 0, &c); + p[1] = usubc (p[1], q[1], c, &c); + p[2] = usubc (p[2], q[2], c, &c); + p[3] = usubc (p[3], q[3], c, &c); + return c; +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-5.c b/gcc/testsuite/gcc.target/i386/pr79173-5.c new file mode 100644 index 0000000..eb3ddd2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-5.c @@ -0,0 +1,32 @@ +/* PR middle-end/79173 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ + +static unsigned long +uaddc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned long *carry_out) +{ + unsigned long r = x + y; + unsigned long c1 = r < x; + r += carry_in; + unsigned long c2 = r < carry_in; + *carry_out = c1 + c2; + return r; +} + +void +foo (unsigned long *p, unsigned long *q) +{ + unsigned long c; + p[0] = uaddc (p[0], q[0], 0, &c); + p[1] = uaddc (p[1], q[1], c, &c); + p[2] = uaddc (p[2], q[2], c, &c); + p[3] = uaddc (p[3], q[3], c, &c); +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-6.c b/gcc/testsuite/gcc.target/i386/pr79173-6.c new file mode 100644 index 0000000..d91ba5a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-6.c @@ -0,0 +1,33 @@ +/* PR middle-end/79173 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 { target lp64 } } } */ +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 12\\\(%e\[^\n\r]*\\\)" 1 { target ia32 } } } */ + +static unsigned long +uaddc (unsigned long x, unsigned long y, unsigned long carry_in, unsigned long *carry_out) +{ + unsigned long r = x + y; + unsigned long c1 = r < x; + r += carry_in; + unsigned long c2 = r < carry_in; + *carry_out = c1 + c2; + return r; +} + +unsigned long +foo (unsigned long *p, unsigned long *q) +{ + unsigned long c; + p[0] = uaddc (p[0], q[0], 0, &c); + p[1] = uaddc (p[1], q[1], c, &c); + p[2] = uaddc (p[2], q[2], c, &c); + p[3] = uaddc (p[3], q[3], c, &c); + return c; +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-7.c b/gcc/testsuite/gcc.target/i386/pr79173-7.c new file mode 100644 index 0000000..48eb101 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-7.c @@ -0,0 +1,31 @@ +/* PR middle-end/79173 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 } } */ + +#include <x86intrin.h> + +void +foo (unsigned long long *p, unsigned long long *q) +{ + unsigned char c = _addcarry_u64 (0, p[0], q[0], &p[0]); + c = _addcarry_u64 (c, p[1], q[1], &p[1]); + c = _addcarry_u64 (c, p[2], q[2], &p[2]); + _addcarry_u64 (c, p[3], q[3], &p[3]); +} + +void +bar (unsigned long long *p, unsigned long long *q) +{ + unsigned char c = _subborrow_u64 (0, p[0], q[0], &p[0]); + c = _subborrow_u64 (c, p[1], q[1], &p[1]); + c = _subborrow_u64 (c, p[2], q[2], &p[2]); + _subborrow_u64 (c, p[3], q[3], &p[3]); +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-8.c b/gcc/testsuite/gcc.target/i386/pr79173-8.c new file mode 100644 index 0000000..c61c016 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-8.c @@ -0,0 +1,31 @@ +/* PR middle-end/79173 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addl\t%e\[^\n\r]*, \\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 4\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 8\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcl\t%e\[^\n\r]*, 12\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "subl\t%e\[^\n\r]*, \\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 4\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 8\\\(%\[^\n\r]*\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbl\t%e\[^\n\r]*, 12\\\(%\[^\n\r]*\\\)" 1 } } */ + +#include <x86intrin.h> + +void +foo (unsigned int *p, unsigned int *q) +{ + unsigned char c = _addcarry_u32 (0, p[0], q[0], &p[0]); + c = _addcarry_u32 (c, p[1], q[1], &p[1]); + c = _addcarry_u32 (c, p[2], q[2], &p[2]); + _addcarry_u32 (c, p[3], q[3], &p[3]); +} + +void +bar (unsigned int *p, unsigned int *q) +{ + unsigned char c = _subborrow_u32 (0, p[0], q[0], &p[0]); + c = _subborrow_u32 (c, p[1], q[1], &p[1]); + c = _subborrow_u32 (c, p[2], q[2], &p[2]); + _subborrow_u32 (c, p[3], q[3], &p[3]); +} diff --git a/gcc/testsuite/gcc.target/i386/pr79173-9.c b/gcc/testsuite/gcc.target/i386/pr79173-9.c new file mode 100644 index 0000000..3847629 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79173-9.c @@ -0,0 +1,31 @@ +/* PR middle-end/79173 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -fno-stack-protector -masm=att" } */ +/* { dg-final { scan-assembler-times "addq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "adcq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "subq\t%r\[^\n\r]*, \\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 8\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 16\\\(%rdi\\\)" 1 } } */ +/* { dg-final { scan-assembler-times "sbbq\t%r\[^\n\r]*, 24\\\(%rdi\\\)" 1 } } */ + +#include <x86intrin.h> + +unsigned long long +foo (unsigned long long *p, unsigned long long *q) +{ + unsigned char c = _addcarry_u64 (0, p[0], q[0], &p[0]); + c = _addcarry_u64 (c, p[1], q[1], &p[1]); + c = _addcarry_u64 (c, p[2], q[2], &p[2]); + return _addcarry_u64 (c, p[3], q[3], &p[3]); +} + +unsigned long long +bar (unsigned long long *p, unsigned long long *q) +{ + unsigned char c = _subborrow_u64 (0, p[0], q[0], &p[0]); + c = _subborrow_u64 (c, p[1], q[1], &p[1]); + c = _subborrow_u64 (c, p[2], q[2], &p[2]); + return _subborrow_u64 (c, p[3], q[3], &p[3]); +} diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc index d77e541..2949957 100644 --- a/gcc/tree-ssa-dce.cc +++ b/gcc/tree-ssa-dce.cc @@ -1481,6 +1481,14 @@ eliminate_unnecessary_stmts (bool aggressive) case IFN_MUL_OVERFLOW: maybe_optimize_arith_overflow (&gsi, MULT_EXPR); break; + case IFN_UADDC: + if (integer_zerop (gimple_call_arg (stmt, 2))) + maybe_optimize_arith_overflow (&gsi, PLUS_EXPR); + break; + case IFN_USUBC: + if (integer_zerop (gimple_call_arg (stmt, 2))) + maybe_optimize_arith_overflow (&gsi, MINUS_EXPR); + break; default: break; } diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 9c9ca57..b2764d4 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4441,6 +4441,496 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, return false; } +/* Helper of match_uaddc_usubc. Look through an integral cast + which should preserve [0, 1] range value (unless source has + 1-bit signed type) and the cast has single use. */ + +static gimple * +uaddc_cast (gimple *g) +{ + if (!gimple_assign_cast_p (g)) + return g; + tree op = gimple_assign_rhs1 (g); + if (TREE_CODE (op) == SSA_NAME + && INTEGRAL_TYPE_P (TREE_TYPE (op)) + && (TYPE_PRECISION (TREE_TYPE (op)) > 1 + || TYPE_UNSIGNED (TREE_TYPE (op))) + && has_single_use (gimple_assign_lhs (g))) + return SSA_NAME_DEF_STMT (op); + return g; +} + +/* Helper of match_uaddc_usubc. Look through a NE_EXPR + comparison with 0 which also preserves [0, 1] value range. */ + +static gimple * +uaddc_ne0 (gimple *g) +{ + if (is_gimple_assign (g) + && gimple_assign_rhs_code (g) == NE_EXPR + && integer_zerop (gimple_assign_rhs2 (g)) + && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME + && has_single_use (gimple_assign_lhs (g))) + return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g)); + return g; +} + +/* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME + operand. */ + +static bool +uaddc_is_cplxpart (gimple *g, tree_code part) +{ + return (is_gimple_assign (g) + && gimple_assign_rhs_code (g) == part + && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME); +} + +/* Try to match e.g. + _29 = .ADD_OVERFLOW (_3, _4); + _30 = REALPART_EXPR <_29>; + _31 = IMAGPART_EXPR <_29>; + _32 = .ADD_OVERFLOW (_30, _38); + _33 = REALPART_EXPR <_32>; + _34 = IMAGPART_EXPR <_32>; + _35 = _31 + _34; + as + _36 = .UADDC (_3, _4, _38); + _33 = REALPART_EXPR <_36>; + _35 = IMAGPART_EXPR <_36>; + or + _22 = .SUB_OVERFLOW (_6, _5); + _23 = REALPART_EXPR <_22>; + _24 = IMAGPART_EXPR <_22>; + _25 = .SUB_OVERFLOW (_23, _37); + _26 = REALPART_EXPR <_25>; + _27 = IMAGPART_EXPR <_25>; + _28 = _24 | _27; + as + _29 = .USUBC (_6, _5, _37); + _26 = REALPART_EXPR <_29>; + _288 = IMAGPART_EXPR <_29>; + provided _38 or _37 above have [0, 1] range + and _3, _4 and _30 or _6, _5 and _23 are unsigned + integral types with the same precision. Whether + or | or ^ is + used on the IMAGPART_EXPR results doesn't matter, with one of + added or subtracted operands in [0, 1] range at most one + .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */ + +static bool +match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code) +{ + tree rhs[4]; + rhs[0] = gimple_assign_rhs1 (stmt); + rhs[1] = gimple_assign_rhs2 (stmt); + rhs[2] = NULL_TREE; + rhs[3] = NULL_TREE; + tree type = TREE_TYPE (rhs[0]); + if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type)) + return false; + + if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR) + { + /* If overflow flag is ignored on the MSB limb, we can end up with + the most significant limb handled as r = op1 + op2 + ovf1 + ovf2; + or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions + thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize + the limb below the MSB, but also create another .UADDC/.USUBC call + for the last limb. + + First look through assignments with the same rhs code as CODE, + with the exception that subtraction of a constant is canonicalized + into addition of its negation. rhs[0] will be minuend for + subtractions and one of addends for addition, all other assigned + rhs[i] operands will be subtrahends or other addends. */ + while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3]) + { + gimple *g = SSA_NAME_DEF_STMT (rhs[0]); + if (has_single_use (rhs[0]) + && is_gimple_assign (g) + && (gimple_assign_rhs_code (g) == code + || (code == MINUS_EXPR + && gimple_assign_rhs_code (g) == PLUS_EXPR + && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST))) + { + tree r2 = gimple_assign_rhs2 (g); + if (gimple_assign_rhs_code (g) != code) + { + r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2); + if (!r2) + break; + } + rhs[0] = gimple_assign_rhs1 (g); + tree &r = rhs[2] ? rhs[3] : rhs[2]; + r = r2; + } + else + break; + } + while (TREE_CODE (rhs[1]) == SSA_NAME && !rhs[3]) + { + gimple *g = SSA_NAME_DEF_STMT (rhs[1]); + if (has_single_use (rhs[1]) + && is_gimple_assign (g) + && gimple_assign_rhs_code (g) == PLUS_EXPR) + { + rhs[1] = gimple_assign_rhs1 (g); + if (rhs[2]) + rhs[3] = gimple_assign_rhs2 (g); + else + rhs[2] = gimple_assign_rhs2 (g); + } + else + break; + } + /* If there are just 3 addends or one minuend and two subtrahends, + check for UADDC or USUBC being pattern recognized earlier. + Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part + got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3) + etc. */ + if (rhs[2] && !rhs[3]) + { + for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i) + if (TREE_CODE (rhs[i]) == SSA_NAME) + { + gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i])); + im = uaddc_ne0 (im); + if (uaddc_is_cplxpart (im, IMAGPART_EXPR)) + { + /* We found one of the 3 addends or 2 subtrahends to be + __imag__ of something, verify it is .UADDC/.USUBC. */ + tree rhs1 = gimple_assign_rhs1 (im); + gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0)); + if (gimple_call_internal_p (ovf, code == PLUS_EXPR + ? IFN_UADDC : IFN_USUBC) + && (optab_handler (code == PLUS_EXPR + ? uaddc5_optab : usubc5_optab, + TYPE_MODE (type)) + != CODE_FOR_nothing)) + { + /* And in that case build another .UADDC/.USUBC + call for the most significand limb addition. + Overflow bit is ignored here. */ + if (i != 2) + std::swap (rhs[i], rhs[2]); + gimple *g + = gimple_build_call_internal (code == PLUS_EXPR + ? IFN_UADDC + : IFN_USUBC, + 3, rhs[0], rhs[1], + rhs[2]); + tree nlhs = make_ssa_name (build_complex_type (type)); + gimple_call_set_lhs (g, nlhs); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + tree ilhs = gimple_assign_lhs (stmt); + g = gimple_build_assign (ilhs, REALPART_EXPR, + build1 (REALPART_EXPR, + TREE_TYPE (ilhs), + nlhs)); + gsi_replace (gsi, g, true); + return true; + } + } + } + return false; + } + if (code == MINUS_EXPR && !rhs[2]) + return false; + if (code == MINUS_EXPR) + /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs. + So, for MINUS_EXPR swap the single added rhs operand (others are + subtracted) to rhs[3]. */ + std::swap (rhs[0], rhs[3]); + } + /* Walk from both operands of STMT (for +/- even sometimes from + all the 4 addends or 3 subtrahends), see through casts and != 0 + statements which would preserve [0, 1] range of values and + check which is initialized from __imag__. */ + gimple *im1 = NULL, *im2 = NULL; + for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++) + if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME) + { + gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i])); + im = uaddc_ne0 (im); + if (uaddc_is_cplxpart (im, IMAGPART_EXPR)) + { + if (im1 == NULL) + { + im1 = im; + if (i != 0) + std::swap (rhs[0], rhs[i]); + } + else + { + im2 = im; + if (i != 1) + std::swap (rhs[1], rhs[i]); + break; + } + } + } + /* If we don't find at least two, punt. */ + if (!im2) + return false; + /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results, + either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have + uaddc5/usubc5 named pattern for the corresponding mode. */ + gimple *ovf1 + = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0)); + gimple *ovf2 + = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0)); + internal_fn ifn; + if (!is_gimple_call (ovf1) + || !gimple_call_internal_p (ovf1) + || ((ifn = gimple_call_internal_fn (ovf1)) != IFN_ADD_OVERFLOW + && ifn != IFN_SUB_OVERFLOW) + || !gimple_call_internal_p (ovf2, ifn) + || optab_handler (ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab, + TYPE_MODE (type)) == CODE_FOR_nothing + || (rhs[2] + && optab_handler (code == PLUS_EXPR ? uaddc5_optab : usubc5_optab, + TYPE_MODE (type)) == CODE_FOR_nothing)) + return false; + tree arg1, arg2, arg3 = NULL_TREE; + gimple *re1 = NULL, *re2 = NULL; + /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments + should be initialized from __real__ of the other of the two calls. + Though, for .SUB_OVERFLOW, it has to be the first argument, not the + second one. */ + for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i) + for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL)) + { + tree arg = gimple_call_arg (ovf, i); + if (TREE_CODE (arg) != SSA_NAME) + continue; + re1 = SSA_NAME_DEF_STMT (arg); + if (uaddc_is_cplxpart (re1, REALPART_EXPR) + && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0)) + == (ovf == ovf1 ? ovf2 : ovf1))) + { + if (ovf == ovf1) + { + /* Make sure ovf2 is the .*_OVERFLOW call with argument + initialized from __real__ of ovf1. */ + std::swap (rhs[0], rhs[1]); + std::swap (im1, im2); + std::swap (ovf1, ovf2); + } + arg3 = gimple_call_arg (ovf, 1 - i); + i = -1; + break; + } + } + if (!arg3) + return false; + arg1 = gimple_call_arg (ovf1, 0); + arg2 = gimple_call_arg (ovf1, 1); + if (!types_compatible_p (type, TREE_TYPE (arg1))) + return false; + int kind[2] = { 0, 0 }; + /* At least one of arg2 and arg3 should have type compatible + with arg1/rhs[0], and the other one should have value in [0, 1] + range. If both are in [0, 1] range and type compatible with + arg1/rhs[0], try harder to find after looking through casts, + != 0 comparisons which one is initialized to __imag__ of + .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */ + for (int i = 0; i < 2; ++i) + { + tree arg = i == 0 ? arg2 : arg3; + if (types_compatible_p (type, TREE_TYPE (arg))) + kind[i] = 1; + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg)) + || (TYPE_PRECISION (TREE_TYPE (arg)) == 1 + && !TYPE_UNSIGNED (TREE_TYPE (arg)))) + continue; + if (tree_zero_one_valued_p (arg)) + kind[i] |= 2; + if (TREE_CODE (arg) == SSA_NAME) + { + gimple *g = SSA_NAME_DEF_STMT (arg); + if (gimple_assign_cast_p (g)) + { + tree op = gimple_assign_rhs1 (g); + if (TREE_CODE (op) == SSA_NAME + && INTEGRAL_TYPE_P (TREE_TYPE (op))) + g = SSA_NAME_DEF_STMT (op); + } + g = uaddc_ne0 (g); + if (!uaddc_is_cplxpart (g, IMAGPART_EXPR)) + continue; + g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0)); + if (!is_gimple_call (g) || !gimple_call_internal_p (g)) + continue; + switch (gimple_call_internal_fn (g)) + { + case IFN_ADD_OVERFLOW: + case IFN_SUB_OVERFLOW: + case IFN_UADDC: + case IFN_USUBC: + break; + default: + continue; + } + kind[i] |= 4; + } + } + /* Make arg2 the one with compatible type and arg3 the one + with [0, 1] range. If both is true for both operands, + prefer as arg3 result of __imag__ of some ifn. */ + if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1])) + { + std::swap (arg2, arg3); + std::swap (kind[0], kind[1]); + } + if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0) + return false; + if (!has_single_use (gimple_assign_lhs (im1)) + || !has_single_use (gimple_assign_lhs (im2)) + || !has_single_use (gimple_assign_lhs (re1)) + || num_imm_uses (gimple_call_lhs (ovf1)) != 2) + return false; + /* Check that ovf2's result is used in __real__ and set re2 + to that statement. */ + use_operand_p use_p; + imm_use_iterator iter; + tree lhs = gimple_call_lhs (ovf2); + FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (is_gimple_debug (use_stmt)) + continue; + if (use_stmt == im2) + continue; + if (re2) + return false; + if (!uaddc_is_cplxpart (use_stmt, REALPART_EXPR)) + return false; + re2 = use_stmt; + } + /* Build .UADDC/.USUBC call which will be placed before the stmt. */ + gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2); + gimple *g; + if ((kind[1] & 1) == 0) + { + if (TREE_CODE (arg3) == INTEGER_CST) + arg3 = fold_convert (type, arg3); + else + { + g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, arg3); + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); + arg3 = gimple_assign_lhs (g); + } + } + g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW + ? IFN_UADDC : IFN_USUBC, + 3, arg1, arg2, arg3); + tree nlhs = make_ssa_name (TREE_TYPE (lhs)); + gimple_call_set_lhs (g, nlhs); + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); + /* In the case where stmt is | or ^ of two overflow flags + or addition of those, replace stmt with __imag__ of the above + added call. In case of arg1 + arg2 + (ovf1 + ovf2) or + arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */ + tree ilhs = rhs[2] ? make_ssa_name (type) : gimple_assign_lhs (stmt); + g = gimple_build_assign (ilhs, IMAGPART_EXPR, + build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs)); + if (rhs[2]) + gsi_insert_before (gsi, g, GSI_SAME_STMT); + else + gsi_replace (gsi, g, true); + /* Remove some statements which can't be kept in the IL because they + use SSA_NAME whose setter is going to be removed too. */ + tree rhs1 = rhs[1]; + for (int i = 0; i < 2; i++) + if (rhs1 == gimple_assign_lhs (im2)) + break; + else + { + g = SSA_NAME_DEF_STMT (rhs1); + rhs1 = gimple_assign_rhs1 (g); + gsi2 = gsi_for_stmt (g); + gsi_remove (&gsi2, true); + } + gcc_checking_assert (rhs1 == gimple_assign_lhs (im2)); + gsi2 = gsi_for_stmt (im2); + gsi_remove (&gsi2, true); + /* Replace the re2 statement with __real__ of the newly added + .UADDC/.USUBC call. */ + gsi2 = gsi_for_stmt (re2); + tree rlhs = gimple_assign_lhs (re2); + g = gimple_build_assign (rlhs, REALPART_EXPR, + build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs)); + gsi_replace (&gsi2, g, true); + if (rhs[2]) + { + /* If this is the arg1 + arg2 + (ovf1 + ovf2) or + arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb, + replace stmt with __real__ of another .UADDC/.USUBC call which + handles the most significant limb. Overflow flag from this is + ignored. */ + g = gimple_build_call_internal (code == PLUS_EXPR + ? IFN_UADDC : IFN_USUBC, + 3, rhs[3], rhs[2], ilhs); + nlhs = make_ssa_name (TREE_TYPE (lhs)); + gimple_call_set_lhs (g, nlhs); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + ilhs = gimple_assign_lhs (stmt); + g = gimple_build_assign (ilhs, REALPART_EXPR, + build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs)); + gsi_replace (gsi, g, true); + } + if (TREE_CODE (arg3) == SSA_NAME) + { + /* When pattern recognizing the second least significant limb + above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb), + check if the [0, 1] range argument (i.e. carry in) isn't the + result of another .{ADD,SUB}_OVERFLOW call (one handling the + least significant limb). Again look through casts and != 0. */ + gimple *im3 = SSA_NAME_DEF_STMT (arg3); + for (int i = 0; i < 2; ++i) + { + gimple *im4 = uaddc_cast (im3); + if (im4 == im3) + break; + else + im3 = im4; + } + im3 = uaddc_ne0 (im3); + if (uaddc_is_cplxpart (im3, IMAGPART_EXPR)) + { + gimple *ovf3 + = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0)); + if (gimple_call_internal_p (ovf3, ifn)) + { + lhs = gimple_call_lhs (ovf3); + arg1 = gimple_call_arg (ovf3, 0); + arg2 = gimple_call_arg (ovf3, 1); + if (types_compatible_p (type, TREE_TYPE (TREE_TYPE (lhs))) + && types_compatible_p (type, TREE_TYPE (arg1)) + && types_compatible_p (type, TREE_TYPE (arg2))) + { + /* And if it is initialized from result of __imag__ + of .{ADD,SUB}_OVERFLOW call, replace that + call with .U{ADD,SUB}C call with the same arguments, + just 0 added as third argument. This isn't strictly + necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0) + produce the same result, but may result in better + generated code on some targets where the backend can + better prepare in how the result will be used. */ + g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW + ? IFN_UADDC : IFN_USUBC, + 3, arg1, arg2, + build_zero_cst (type)); + gimple_call_set_lhs (g, lhs); + gsi2 = gsi_for_stmt (ovf3); + gsi_replace (&gsi2, g, true); + } + } + } + } + return true; +} + /* Return true if target has support for divmod. */ static bool @@ -5068,8 +5558,9 @@ math_opts_dom_walker::after_dom_children (basic_block bb) case PLUS_EXPR: case MINUS_EXPR: - if (!convert_plusminus_to_widen (&gsi, stmt, code)) - match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p); + if (!convert_plusminus_to_widen (&gsi, stmt, code) + && !match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p)) + match_uaddc_usubc (&gsi, stmt, code); break; case BIT_NOT_EXPR: @@ -5085,6 +5576,11 @@ math_opts_dom_walker::after_dom_children (basic_block bb) convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi); break; + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + match_uaddc_usubc (&gsi, stmt, code); + break; + default:; } } |