diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/internal-fn.c | 77 | ||||
-rw-r--r-- | gcc/optabs.c | 132 | ||||
-rw-r--r-- | gcc/optabs.h | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr97459-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr97459-2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr97459-3.c | 54 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr97459-4.c | 57 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr97459-5.c | 56 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr97459-6.c | 62 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr97282.c | 4 |
10 files changed, 414 insertions, 34 deletions
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 9c4fd1c..41223ff 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -3230,27 +3230,68 @@ expand_DIVMOD (internal_fn, gcall *call_stmt) the division and modulo and if it emits any library calls or any {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or divmod libcall. */ - struct separate_ops ops; - ops.code = TRUNC_DIV_EXPR; - ops.type = type; - ops.op0 = make_tree (ops.type, op0); - ops.op1 = arg1; - ops.op2 = NULL_TREE; - ops.location = gimple_location (call_stmt); - start_sequence (); - quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL); - if (contains_call_div_mod (get_insns ())) - quotient = NULL_RTX; - else + scalar_int_mode int_mode; + if (remainder == NULL_RTX + && optimize + && CONST_INT_P (op1) + && !pow2p_hwi (INTVAL (op1)) + && is_int_mode (TYPE_MODE (type), &int_mode) + && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD + && optab_handler (and_optab, word_mode) != CODE_FOR_nothing + && optab_handler (add_optab, word_mode) != CODE_FOR_nothing + && optimize_insn_for_speed_p ()) + { + rtx_insn *last = get_last_insn (); + remainder = NULL_RTX; + quotient = expand_doubleword_divmod (int_mode, op0, op1, &remainder, + TYPE_UNSIGNED (type)); + if (quotient != NULL_RTX) + { + if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing) + { + rtx_insn *move = emit_move_insn (quotient, quotient); + set_dst_reg_note (move, REG_EQUAL, + gen_rtx_fmt_ee (TYPE_UNSIGNED (type) + ? UDIV : DIV, int_mode, + copy_rtx (op0), op1), + quotient); + move = emit_move_insn (remainder, remainder); + set_dst_reg_note (move, REG_EQUAL, + gen_rtx_fmt_ee (TYPE_UNSIGNED (type) + ? UMOD : MOD, int_mode, + copy_rtx (op0), op1), + quotient); + } + } + else + delete_insns_since (last); + } + + if (remainder == NULL_RTX) { - ops.code = TRUNC_MOD_EXPR; - remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL); + struct separate_ops ops; + ops.code = TRUNC_DIV_EXPR; + ops.type = type; + ops.op0 = make_tree (ops.type, op0); + ops.op1 = arg1; + ops.op2 = NULL_TREE; + ops.location = gimple_location (call_stmt); + start_sequence (); + quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL); if (contains_call_div_mod (get_insns ())) - remainder = NULL_RTX; + quotient = NULL_RTX; + else + { + ops.code = TRUNC_MOD_EXPR; + remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, + EXPAND_NORMAL); + if (contains_call_div_mod (get_insns ())) + remainder = NULL_RTX; + } + if (remainder) + insns = get_insns (); + end_sequence (); } - if (remainder) - insns = get_insns (); - end_sequence (); } if (remainder) diff --git a/gcc/optabs.c b/gcc/optabs.c index 3b116d3..41daa48 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -1118,6 +1118,99 @@ expand_doubleword_mod (machine_mode mode, rtx op0, rtx op1, bool unsignedp) } return NULL_RTX; } + +/* Similarly to the above function, but compute both quotient and remainder. + Quotient can be computed from the remainder as: + rem = op0 % op1; // Handled using expand_doubleword_mod + quot = (op0 - rem) * inv; // inv is multiplicative inverse of op1 modulo + // 2 * BITS_PER_WORD + + We can also handle cases where op1 is a multiple of power of two constant + and constant handled by expand_doubleword_mod. + op11 = 1 << __builtin_ctz (op1); + op12 = op1 / op11; + rem1 = op0 % op12; // Handled using expand_doubleword_mod + quot1 = (op0 - rem1) * inv; // inv is multiplicative inverse of op12 modulo + // 2 * BITS_PER_WORD + rem = (quot1 % op11) * op12 + rem1; + quot = quot1 / op11; */ + +rtx +expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem, + bool unsignedp) +{ + *rem = NULL_RTX; + + /* Negative dividend should have been optimized into positive, + similarly modulo by 1 and modulo by power of two is optimized + differently too. */ + if (INTVAL (op1) <= 1 || pow2p_hwi (INTVAL (op1))) + return NULL_RTX; + + rtx op11 = const1_rtx; + rtx op12 = op1; + if ((INTVAL (op1) & 1) == 0) + { + int bit = ctz_hwi (INTVAL (op1)); + op11 = GEN_INT (HOST_WIDE_INT_1 << bit); + op12 = GEN_INT (INTVAL (op1) >> bit); + } + + rtx rem1 = expand_doubleword_mod (mode, op0, op12, unsignedp); + if (rem1 == NULL_RTX) + return NULL_RTX; + + int prec = 2 * BITS_PER_WORD; + wide_int a = wide_int::from (INTVAL (op12), prec + 1, UNSIGNED); + wide_int b = wi::shifted_mask (prec, 1, false, prec + 1); + wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED); + rtx inv = immed_wide_int_const (m, mode); + + rtx_insn *last = get_last_insn (); + rtx quot1 = expand_simple_binop (mode, MINUS, op0, rem1, + NULL_RTX, unsignedp, OPTAB_DIRECT); + if (quot1 == NULL_RTX) + return NULL_RTX; + + quot1 = expand_simple_binop (mode, MULT, quot1, inv, + NULL_RTX, unsignedp, OPTAB_DIRECT); + if (quot1 == NULL_RTX) + return NULL_RTX; + + if (op11 != const1_rtx) + { + rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, quot1, op11, + NULL_RTX, unsignedp); + if (rem2 == NULL_RTX) + return NULL_RTX; + + rem2 = expand_simple_binop (mode, MULT, rem2, op12, NULL_RTX, + unsignedp, OPTAB_DIRECT); + if (rem2 == NULL_RTX) + return NULL_RTX; + + rem2 = expand_simple_binop (mode, PLUS, rem2, rem1, NULL_RTX, + unsignedp, OPTAB_DIRECT); + if (rem2 == NULL_RTX) + return NULL_RTX; + + rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, quot1, op11, + NULL_RTX, unsignedp); + if (quot2 == NULL_RTX) + return NULL_RTX; + + rem1 = rem2; + quot1 = quot2; + } + + /* Punt if we need any library calls. */ + for (; last; last = NEXT_INSN (last)) + if (CALL_P (last)) + return NULL_RTX; + + *rem = rem1; + return quot1; +} /* Wrapper around expand_binop which takes an rtx code to specify the operation to perform, not an optab pointer. All other @@ -1999,7 +2092,10 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1, } /* Attempt to synthetize double word modulo by constant divisor. */ - if ((binoptab == umod_optab || binoptab == smod_optab) + if ((binoptab == umod_optab + || binoptab == smod_optab + || binoptab == udiv_optab + || binoptab == sdiv_optab) && optimize && CONST_INT_P (op1) && is_int_mode (mode, &int_mode) @@ -2008,21 +2104,33 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1, && optab_handler (add_optab, word_mode) != CODE_FOR_nothing && optimize_insn_for_speed_p ()) { - rtx remainder = expand_doubleword_mod (int_mode, op0, op1, - binoptab == umod_optab); - if (remainder != NULL_RTX) + rtx res = NULL_RTX; + if ((binoptab == umod_optab || binoptab == smod_optab) + && (INTVAL (op1) & 1) == 0) + res = expand_doubleword_mod (int_mode, op0, op1, + binoptab == umod_optab); + else + { + rtx quot = expand_doubleword_divmod (int_mode, op0, op1, &res, + binoptab == umod_optab + || binoptab == udiv_optab); + if (quot == NULL_RTX) + res = NULL_RTX; + else if (binoptab == udiv_optab || binoptab == sdiv_optab) + res = quot; + } + if (res != NULL_RTX) { if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing) { - rtx_insn *move = emit_move_insn (target ? target : remainder, - remainder); - set_dst_reg_note (move, - REG_EQUAL, - gen_rtx_fmt_ee (UMOD, int_mode, - copy_rtx (op0), op1), - target ? target : remainder); + rtx_insn *move = emit_move_insn (target ? target : res, + res); + set_dst_reg_note (move, REG_EQUAL, + gen_rtx_fmt_ee (optab_to_code (binoptab), + int_mode, copy_rtx (op0), op1), + target ? target : res); } - return remainder; + return res; } else delete_insns_since (last); diff --git a/gcc/optabs.h b/gcc/optabs.h index 84aaa7a..87fed90 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -183,6 +183,8 @@ extern bool force_expand_binop (machine_mode, optab, rtx, rtx, rtx, int, enum optab_methods); extern rtx expand_vector_broadcast (machine_mode, rtx); +extern rtx expand_doubleword_divmod (machine_mode, rtx, rtx, rtx *, bool); + /* Generate code for a simple binary or unary operation. "Simple" in this case means "can be unambiguously described by a (mode, code) pair and mapped to a single optab." */ diff --git a/gcc/testsuite/gcc.dg/pr97459-1.c b/gcc/testsuite/gcc.dg/pr97459-1.c index 3dcbb1d..96c7ab6 100644 --- a/gcc/testsuite/gcc.dg/pr97459-1.c +++ b/gcc/testsuite/gcc.dg/pr97459-1.c @@ -24,7 +24,7 @@ T __attribute__((noipa)) foo (T x, T n) { return x % n; } #define C3(n) C2(n##0) C2(n##4) C2(n##9) #define C4(n) C3(n##0) C3(n##3) C3(n##7) #endif -#define TESTS C4(1) +#define TESTS C4(1) C1(10010) C1(10012) C1(16144) TESTS diff --git a/gcc/testsuite/gcc.dg/pr97459-2.c b/gcc/testsuite/gcc.dg/pr97459-2.c index 83e00cb..0e2bfbd 100644 --- a/gcc/testsuite/gcc.dg/pr97459-2.c +++ b/gcc/testsuite/gcc.dg/pr97459-2.c @@ -26,7 +26,7 @@ T __attribute__((noipa)) foo (T x, T n) { return x % n; } #define C3(n) C2(n##0) C2(n##4) C2(n##9) #define C4(n) C3(n##0) C3(n##3) C3(n##7) #endif -#define TESTS C4(1) +#define TESTS C4(1) C1(10010) C1(10012) C1(16144) TESTS diff --git a/gcc/testsuite/gcc.dg/pr97459-3.c b/gcc/testsuite/gcc.dg/pr97459-3.c new file mode 100644 index 0000000..7fbb7ee --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97459-3.c @@ -0,0 +1,54 @@ +/* PR rtl-optimization/97459 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */ + +#ifdef __SIZEOF_INT128__ +typedef __uint128_t T; +#else +typedef unsigned long long T; +#endif + +T __attribute__((noipa)) foo (T x, T n) { return x / n; } +#define C(n) T __attribute__((noipa)) foo##n (T x) { return x / (n - 10000); } + +#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9) +#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \ + C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9) +#ifdef EXPENSIVE +#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \ + C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9) +#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \ + C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9) +#else +#define C3(n) C2(n##0) C2(n##4) C2(n##9) +#define C4(n) C3(n##0) C3(n##3) C3(n##7) +#endif +#define TESTS C4(1) C1(10010) C1(10012) C1(16144) + +TESTS + +struct S { T x; T (*foo) (T); }; + +#undef C +#define C(n) { n - 10000, foo##n }, + +struct S tests[] = { +TESTS + { 0, 0 } +}; + +int +main () +{ + int i, j, k; + for (k = 0; tests[k].x; k++) + for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++) + for (j = -5; j <= 5; j++) + { + T x = ((T) 1 << i) + j; + if (foo (x, tests[k].x) != tests[k].foo (x)) + __builtin_abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr97459-4.c b/gcc/testsuite/gcc.dg/pr97459-4.c new file mode 100644 index 0000000..33e49a9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97459-4.c @@ -0,0 +1,57 @@ +/* PR rtl-optimization/97459 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */ + +#ifdef __SIZEOF_INT128__ +typedef __int128_t T; +typedef __uint128_t U; +#else +typedef long long T; +typedef unsigned long long U; +#endif + +T __attribute__((noipa)) foo (T x, T n) { return x / n; } +#define C(n) T __attribute__((noipa)) foo##n (T x) { return x / (n - 10000); } + +#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9) +#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \ + C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9) +#ifdef EXPENSIVE +#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \ + C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9) +#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \ + C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9) +#else +#define C3(n) C2(n##0) C2(n##4) C2(n##9) +#define C4(n) C3(n##0) C3(n##3) C3(n##7) +#endif +#define TESTS C4(1) C1(10010) C1(10012) C1(16144) + +TESTS + +struct S { T x; T (*foo) (T); }; + +#undef C +#define C(n) { n - 10000, foo##n }, + +struct S tests[] = { +TESTS + { 0, 0 } +}; + +int +main () +{ + int i, j, k; + for (k = 0; tests[k].x; k++) + for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++) + for (j = -5; j <= 5; j++) + { + U x = ((U) 1 << i) + j; + if (foo ((T) x, tests[k].x) != tests[k].foo ((T) x) + || foo ((T) -x, tests[k].x) != tests[k].foo ((T) -x)) + __builtin_abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr97459-5.c b/gcc/testsuite/gcc.dg/pr97459-5.c new file mode 100644 index 0000000..f658a5a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97459-5.c @@ -0,0 +1,56 @@ +/* PR rtl-optimization/97459 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */ + +#ifdef __SIZEOF_INT128__ +typedef __uint128_t T; +#else +typedef unsigned long long T; +#endif + +T __attribute__((noipa)) foo (T x, T n, T *r) { *r = x % n; return x / n; } +#define C(n) T __attribute__((noipa)) foo##n (T x, T *r) { *r = x % (n - 10000); return x / (n - 10000); } + +#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9) +#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \ + C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9) +#ifdef EXPENSIVE +#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \ + C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9) +#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \ + C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9) +#else +#define C3(n) C2(n##0) C2(n##4) C2(n##9) +#define C4(n) C3(n##0) C3(n##3) C3(n##7) +#endif +#define TESTS C4(1) C1(10010) C1(10012) C1(16144) + +TESTS + +struct S { T x; T (*foo) (T, T *); }; + +#undef C +#define C(n) { n - 10000, foo##n }, + +struct S tests[] = { +TESTS + { 0, 0 } +}; + +int +main () +{ + int i, j, k; + for (k = 0; tests[k].x; k++) + for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++) + for (j = -5; j <= 5; j++) + { + T x = ((T) 1 << i) + j; + T r1, r2; + if (foo (x, tests[k].x, &r1) != tests[k].foo (x, &r2) + || r1 != r2) + __builtin_abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr97459-6.c b/gcc/testsuite/gcc.dg/pr97459-6.c new file mode 100644 index 0000000..d4602be --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97459-6.c @@ -0,0 +1,62 @@ +/* PR rtl-optimization/97459 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-DEXPENSIVE" { target run_expensive_tests } } */ + +#ifdef __SIZEOF_INT128__ +typedef __int128_t T; +typedef __uint128_t U; +#else +typedef long long T; +typedef unsigned long long U; +#endif + +T __attribute__((noipa)) foo (T x, T n, T *r) { *r = x % n; return x / n; } +#define C(n) T __attribute__((noipa)) foo##n (T x, T *r) { *r = x % (n - 10000); return x / (n - 10000); } + +#define C1(n) C(n##1) C(n##3) C(n##5) C(n##7) C(n##9) +#define C2(n) C1(n##0) C1(n##1) C1(n##2) C1(n##3) C1(n##4) \ + C1(n##5) C1(n##6) C1(n##7) C1(n##8) C1(n##9) +#ifdef EXPENSIVE +#define C3(n) C2(n##0) C2(n##1) C2(n##2) C2(n##3) C2(n##4) \ + C2(n##5) C2(n##6) C2(n##7) C2(n##8) C2(n##9) +#define C4(n) C3(n##0) C3(n##1) C3(n##2) C3(n##3) C3(n##4) \ + C3(n##5) C3(n##6) C3(n##7) C3(n##8) C3(n##9) +#else +#define C3(n) C2(n##0) C2(n##4) C2(n##9) +#define C4(n) C3(n##0) C3(n##3) C3(n##7) +#endif +#define TESTS C4(1) C1(10010) C1(10012) C1(16144) + +TESTS + +struct S { T x; T (*foo) (T, T *); }; + +#undef C +#define C(n) { n - 10000, foo##n }, + +struct S tests[] = { +TESTS + { 0, 0 } +}; + +int +main () +{ + int i, j, k; + for (k = 0; tests[k].x; k++) + for (i = 0; i < sizeof (T) * __CHAR_BIT__; i++) + for (j = -5; j <= 5; j++) + { + U x = ((U) 1 << i) + j; + T r1 = 0, r2 = 0; + if (foo ((T) x, tests[k].x, &r1) != tests[k].foo ((T) x, &r2) + || r1 != r2) + __builtin_abort (); + r1 = 0; r2 = 0; + if (foo ((T) -x, tests[k].x, &r1) != tests[k].foo ((T) -x, &r2) + || r1 != r2) + __builtin_abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr97282.c b/gcc/testsuite/gcc.target/i386/pr97282.c index 6fb10c8..94ce50b 100644 --- a/gcc/testsuite/gcc.target/i386/pr97282.c +++ b/gcc/testsuite/gcc.target/i386/pr97282.c @@ -18,8 +18,8 @@ foo (T x) unsigned long ret = 0; while (x > 0) { - ret = ret + x % 10; - x = x / 10; + ret = ret + x % 123456; + x = x / 123456; } return ret; } |