diff options
author | Roger Sayle <roger@eyesopen.com> | 2005-03-14 18:24:15 +0000 |
---|---|---|
committer | Roger Sayle <sayle@gcc.gnu.org> | 2005-03-14 18:24:15 +0000 |
commit | f927760badd161592135b6ba58b5f418b19dcec5 (patch) | |
tree | bf30c17ba5e1564a31e749406185e5fe9e1e8ae8 /gcc | |
parent | a6ee1a15322bd2acadcd49c9e2aa315f32803de5 (diff) | |
download | gcc-f927760badd161592135b6ba58b5f418b19dcec5.zip gcc-f927760badd161592135b6ba58b5f418b19dcec5.tar.gz gcc-f927760badd161592135b6ba58b5f418b19dcec5.tar.bz2 |
re PR rtl-optimization/17236 (inefficient code for long long multiply on x86)
PR rtl-optimization/17236
* optabs.c (expand_doubleword_mult): New helper function split out
from expand_binop. Permute the order in which instructions are
emitted to minimize the number of simultaneously live registers.
(expand_binop): Call expand_doubleword_mult to synthesize a double
word multiplication.
From-SVN: r96441
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/optabs.c | 362 |
2 files changed, 198 insertions, 173 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 53f5d5c..865c247 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2005-03-14 Roger Sayle <roger@eyesopen.com> + + PR rtl-optimization/17236 + * optabs.c (expand_doubleword_mult): New helper function split out + from expand_binop. Permute the order in which instructions are + emitted to minimize the number of simultaneously live registers. + (expand_binop): Call expand_doubleword_mult to synthesize a double + word multiplication. + 2005-03-14 Kazu Hirata <kazu@cs.umass.edu> * basic-block.h: Update the prototypes of cached_make_edge and diff --git a/gcc/optabs.c b/gcc/optabs.c index 9525620..f0c336e 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -756,6 +756,168 @@ expand_doubleword_shift (enum machine_mode op1_mode, optab binoptab, return true; } +/* Subroutine of expand_binop. Perform a double word multiplication of + operands OP0 and OP1 both of mode MODE, which is exactly twice as wide + as the target's word_mode. This function return NULL_RTX if anything + goes wrong, in which case it may have already emitted instructions + which need to be deleted. + + If we want to multiply two two-word values and have normal and widening + multiplies of single-word values, we can do this with three smaller + multiplications. Note that we do not make a REG_NO_CONFLICT block here + because we are not operating on one word at a time. + + The multiplication proceeds as follows: + _______________________ + [__op0_high_|__op0_low__] + _______________________ + * [__op1_high_|__op1_low__] + _______________________________________________ + _______________________ + (1) [__op0_low__*__op1_low__] + _______________________ + (2a) [__op0_low__*__op1_high_] + _______________________ + (2b) [__op0_high_*__op1_low__] + _______________________ + (3) [__op0_high_*__op1_high_] + + + This gives a 4-word result. Since we are only interested in the + lower 2 words, partial result (3) and the upper words of (2a) and + (2b) don't need to be calculated. Hence (2a) and (2b) can be + calculated using non-widening multiplication. + + (1), however, needs to be calculated with an unsigned widening + multiplication. If this operation is not directly supported we + try using a signed widening multiplication and adjust the result. + This adjustment works as follows: + + If both operands are positive then no adjustment is needed. + + If the operands have different signs, for example op0_low < 0 and + op1_low >= 0, the instruction treats the most significant bit of + op0_low as a sign bit instead of a bit with significance + 2**(BITS_PER_WORD-1), i.e. the instruction multiplies op1_low + with 2**BITS_PER_WORD - op0_low, and two's complements the + result. Conclusion: We need to add op1_low * 2**BITS_PER_WORD to + the result. + + Similarly, if both operands are negative, we need to add + (op0_low + op1_low) * 2**BITS_PER_WORD. + + We use a trick to adjust quickly. We logically shift op0_low right + (op1_low) BITS_PER_WORD-1 steps to get 0 or 1, and add this to + op0_high (op1_high) before it is used to calculate 2b (2a). If no + logical shift exists, we do an arithmetic right shift and subtract + the 0 or -1. */ + +static rtx +expand_doubleword_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target, + bool umulp, enum optab_methods methods) +{ + int low = (WORDS_BIG_ENDIAN ? 1 : 0); + int high = (WORDS_BIG_ENDIAN ? 0 : 1); + rtx wordm1 = umulp ? NULL_RTX : GEN_INT (BITS_PER_WORD - 1); + rtx product, adjust, product_high, temp; + + rtx op0_high = operand_subword_force (op0, high, mode); + rtx op0_low = operand_subword_force (op0, low, mode); + rtx op1_high = operand_subword_force (op1, high, mode); + rtx op1_low = operand_subword_force (op1, low, mode); + + /* If we're using an unsigned multiply to directly compute the product + of the low-order words of the operands and perform any required + adjustments of the operands, we begin by trying two more multiplications + and then computing the appropriate sum. + + We have checked above that the required addition is provided. + Full-word addition will normally always succeed, especially if + it is provided at all, so we don't worry about its failure. The + multiplication may well fail, however, so we do handle that. */ + + if (!umulp) + { + /* ??? This could be done with emit_store_flag where available. */ + temp = expand_binop (word_mode, lshr_optab, op0_low, wordm1, + NULL_RTX, 1, methods); + if (temp) + op0_high = expand_binop (word_mode, add_optab, op0_high, temp, + op0_high, 0, OPTAB_DIRECT); + else + { + temp = expand_binop (word_mode, ashr_optab, op0_low, wordm1, + NULL_RTX, 0, methods); + if (!temp) + return NULL_RTX; + op0_high = expand_binop (word_mode, sub_optab, op0_high, temp, + op0_high, 0, OPTAB_DIRECT); + } + + if (!op0_high) + return NULL_RTX; + } + + adjust = expand_binop (word_mode, smul_optab, op0_high, op1_low, + NULL_RTX, 0, OPTAB_DIRECT); + if (!adjust) + return NULL_RTX; + + /* OP0_HIGH should now be dead. */ + + if (!umulp) + { + /* ??? This could be done with emit_store_flag where available. */ + temp = expand_binop (word_mode, lshr_optab, op1_low, wordm1, + NULL_RTX, 1, methods); + if (temp) + op1_high = expand_binop (word_mode, add_optab, op1_high, temp, + op1_high, 0, OPTAB_DIRECT); + else + { + temp = expand_binop (word_mode, ashr_optab, op1_low, wordm1, + NULL_RTX, 0, methods); + if (!temp) + return NULL_RTX; + op1_high = expand_binop (word_mode, sub_optab, op1_high, temp, + op1_high, 0, OPTAB_DIRECT); + } + + if (!op1_high) + return NULL_RTX; + } + + temp = expand_binop (word_mode, smul_optab, op1_high, op0_low, + NULL_RTX, 0, OPTAB_DIRECT); + if (!temp) + return NULL_RTX; + + /* OP1_HIGH should now be dead. */ + + adjust = expand_binop (word_mode, add_optab, adjust, temp, + adjust, 0, OPTAB_DIRECT); + + if (target && !REG_P (target)) + target = NULL_RTX; + + if (umulp) + product = expand_binop (mode, umul_widen_optab, op0_low, op1_low, + target, 1, OPTAB_DIRECT); + else + product = expand_binop (mode, smul_widen_optab, op0_low, op1_low, + target, 1, OPTAB_DIRECT); + + if (!product) + return NULL_RTX; + + product_high = operand_subword (product, high, 1, mode); + adjust = expand_binop (word_mode, add_optab, product_high, adjust, + REG_P (product_high) ? product_high : adjust, + 0, OPTAB_DIRECT); + emit_move_insn (product_high, adjust); + return product; +} + /* Wrapper around expand_binop which takes an rtx code to specify the operation to perform, not an optab pointer. All other arguments are the same. */ @@ -1399,197 +1561,51 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1, delete_insns_since (last); } - /* If we want to multiply two two-word values and have normal and widening - multiplies of single-word values, we can do this with three smaller - multiplications. Note that we do not make a REG_NO_CONFLICT block here - because we are not operating on one word at a time. - - The multiplication proceeds as follows: - _______________________ - [__op0_high_|__op0_low__] - _______________________ - * [__op1_high_|__op1_low__] - _______________________________________________ - _______________________ - (1) [__op0_low__*__op1_low__] - _______________________ - (2a) [__op0_low__*__op1_high_] - _______________________ - (2b) [__op0_high_*__op1_low__] - _______________________ - (3) [__op0_high_*__op1_high_] - - - This gives a 4-word result. Since we are only interested in the - lower 2 words, partial result (3) and the upper words of (2a) and - (2b) don't need to be calculated. Hence (2a) and (2b) can be - calculated using non-widening multiplication. - - (1), however, needs to be calculated with an unsigned widening - multiplication. If this operation is not directly supported we - try using a signed widening multiplication and adjust the result. - This adjustment works as follows: - - If both operands are positive then no adjustment is needed. - - If the operands have different signs, for example op0_low < 0 and - op1_low >= 0, the instruction treats the most significant bit of - op0_low as a sign bit instead of a bit with significance - 2**(BITS_PER_WORD-1), i.e. the instruction multiplies op1_low - with 2**BITS_PER_WORD - op0_low, and two's complements the - result. Conclusion: We need to add op1_low * 2**BITS_PER_WORD to - the result. - - Similarly, if both operands are negative, we need to add - (op0_low + op1_low) * 2**BITS_PER_WORD. - - We use a trick to adjust quickly. We logically shift op0_low right - (op1_low) BITS_PER_WORD-1 steps to get 0 or 1, and add this to - op0_high (op1_high) before it is used to calculate 2b (2a). If no - logical shift exists, we do an arithmetic right shift and subtract - the 0 or -1. */ + /* Attempt to synthesize double word multiplies using a sequence of word + mode multiplications. We first attempt to generate a sequence using a + more efficient unsigned widening multiply, and if that fails we then + try using a signed widening multiply. */ if (binoptab == smul_optab && class == MODE_INT && GET_MODE_SIZE (mode) == 2 * UNITS_PER_WORD && smul_optab->handlers[(int) word_mode].insn_code != CODE_FOR_nothing - && add_optab->handlers[(int) word_mode].insn_code != CODE_FOR_nothing - && ((umul_widen_optab->handlers[(int) mode].insn_code - != CODE_FOR_nothing) - || (smul_widen_optab->handlers[(int) mode].insn_code - != CODE_FOR_nothing))) + && add_optab->handlers[(int) word_mode].insn_code != CODE_FOR_nothing) { - int low = (WORDS_BIG_ENDIAN ? 1 : 0); - int high = (WORDS_BIG_ENDIAN ? 0 : 1); - rtx op0_high = operand_subword_force (op0, high, mode); - rtx op0_low = operand_subword_force (op0, low, mode); - rtx op1_high = operand_subword_force (op1, high, mode); - rtx op1_low = operand_subword_force (op1, low, mode); - rtx product = 0; - rtx op0_xhigh = NULL_RTX; - rtx op1_xhigh = NULL_RTX; - - /* If the target is the same as one of the inputs, don't use it. This - prevents problems with the REG_EQUAL note. */ - if (target == op0 || target == op1 - || (target != 0 && !REG_P (target))) - target = 0; - - /* Multiply the two lower words to get a double-word product. - If unsigned widening multiplication is available, use that; - otherwise use the signed form and compensate. */ - - if (umul_widen_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing) - { - product = expand_binop (mode, umul_widen_optab, op0_low, op1_low, - target, 1, OPTAB_DIRECT); + rtx product = NULL_RTX; - /* If we didn't succeed, delete everything we did so far. */ - if (product == 0) + if (umul_widen_optab->handlers[(int) mode].insn_code + != CODE_FOR_nothing) + { + product = expand_doubleword_mult (mode, op0, op1, target, + true, methods); + if (!product) delete_insns_since (last); - else - op0_xhigh = op0_high, op1_xhigh = op1_high; } - if (product == 0 + if (product == NULL_RTX && smul_widen_optab->handlers[(int) mode].insn_code - != CODE_FOR_nothing) + != CODE_FOR_nothing) { - rtx wordm1 = GEN_INT (BITS_PER_WORD - 1); - product = expand_binop (mode, smul_widen_optab, op0_low, op1_low, - target, 1, OPTAB_DIRECT); - op0_xhigh = expand_binop (word_mode, lshr_optab, op0_low, wordm1, - NULL_RTX, 1, next_methods); - if (op0_xhigh) - op0_xhigh = expand_binop (word_mode, add_optab, op0_high, - op0_xhigh, op0_xhigh, 0, next_methods); - else - { - op0_xhigh = expand_binop (word_mode, ashr_optab, op0_low, wordm1, - NULL_RTX, 0, next_methods); - if (op0_xhigh) - op0_xhigh = expand_binop (word_mode, sub_optab, op0_high, - op0_xhigh, op0_xhigh, 0, - next_methods); - } - - op1_xhigh = expand_binop (word_mode, lshr_optab, op1_low, wordm1, - NULL_RTX, 1, next_methods); - if (op1_xhigh) - op1_xhigh = expand_binop (word_mode, add_optab, op1_high, - op1_xhigh, op1_xhigh, 0, next_methods); - else - { - op1_xhigh = expand_binop (word_mode, ashr_optab, op1_low, wordm1, - NULL_RTX, 0, next_methods); - if (op1_xhigh) - op1_xhigh = expand_binop (word_mode, sub_optab, op1_high, - op1_xhigh, op1_xhigh, 0, - next_methods); - } + product = expand_doubleword_mult (mode, op0, op1, target, + false, methods); + if (!product) + delete_insns_since (last); } - /* If we have been able to directly compute the product of the - low-order words of the operands and perform any required adjustments - of the operands, we proceed by trying two more multiplications - and then computing the appropriate sum. - - We have checked above that the required addition is provided. - Full-word addition will normally always succeed, especially if - it is provided at all, so we don't worry about its failure. The - multiplication may well fail, however, so we do handle that. */ - - if (product && op0_xhigh && op1_xhigh) + if (product != NULL_RTX) { - rtx product_high = operand_subword (product, high, 1, mode); - rtx temp = expand_binop (word_mode, binoptab, op0_low, op1_xhigh, - NULL_RTX, 0, OPTAB_DIRECT); - - if (!REG_P (product_high)) - product_high = force_reg (word_mode, product_high); - - if (temp != 0) - temp = expand_binop (word_mode, add_optab, temp, product_high, - product_high, 0, next_methods); - - if (temp != 0 && temp != product_high) - emit_move_insn (product_high, temp); - - if (temp != 0) - temp = expand_binop (word_mode, binoptab, op1_low, op0_xhigh, - NULL_RTX, 0, OPTAB_DIRECT); - - if (temp != 0) - temp = expand_binop (word_mode, add_optab, temp, - product_high, product_high, - 0, next_methods); - - if (temp != 0 && temp != product_high) - emit_move_insn (product_high, temp); - - emit_move_insn (operand_subword (product, high, 1, mode), product_high); - - if (temp != 0) + if (mov_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing) { - if (mov_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing) - { - temp = emit_move_insn (product, product); - set_unique_reg_note (temp, - REG_EQUAL, - gen_rtx_fmt_ee (MULT, mode, - copy_rtx (op0), - copy_rtx (op1))); - } - - return product; + temp = emit_move_insn (target ? target : product, product); + set_unique_reg_note (temp, + REG_EQUAL, + gen_rtx_fmt_ee (MULT, mode, + copy_rtx (op0), + copy_rtx (op1))); } + return product; } - - /* If we get here, we couldn't do it for some reason even though we - originally thought we could. Delete anything we've emitted in - trying to do it. */ - - delete_insns_since (last); } /* It can't be open-coded in this mode. |