diff options
author | Richard Henderson <rth@redhat.com> | 2005-01-05 22:22:32 -0800 |
---|---|---|
committer | Richard Henderson <rth@gcc.gnu.org> | 2005-01-05 22:22:32 -0800 |
commit | e358acde2cc2c540b85ab7c5df397d653032b70d (patch) | |
tree | c9384bc8107956e150e097f383e98ca029b78464 /gcc | |
parent | 051d8245886f5f133fad42d38b9d4ce12afd1394 (diff) | |
download | gcc-e358acde2cc2c540b85ab7c5df397d653032b70d.zip gcc-e358acde2cc2c540b85ab7c5df397d653032b70d.tar.gz gcc-e358acde2cc2c540b85ab7c5df397d653032b70d.tar.bz2 |
re PR rtl-optimization/11327 (Non-optimal code when using MMX/SSE builtins)
PR target/11327
* config/i386/i386.c (BUILTIN_DESC_SWAP_OPERANDS): New.
(bdesc_2arg): Use it.
(ix86_expand_binop_builtin): Force operands into registers
when optimizing.
(ix86_expand_unop_builtin, ix86_expand_unop1_builtin,
ix86_expand_sse_compare, ix86_expand_sse_comi,
ix86_expand_builtin): Likewise.
From-SVN: r92988
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 87 |
2 files changed, 66 insertions, 32 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 48586a2..43e651c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,16 @@ 2005-01-05 Richard Henderson <rth@redhat.com> + PR target/11327 + * config/i386/i386.c (BUILTIN_DESC_SWAP_OPERANDS): New. + (bdesc_2arg): Use it. + (ix86_expand_binop_builtin): Force operands into registers + when optimizing. + (ix86_expand_unop_builtin, ix86_expand_unop1_builtin, + ix86_expand_sse_compare, ix86_expand_sse_comi, + ix86_expand_builtin): Likewise. + +2005-01-05 Richard Henderson <rth@redhat.com> + * config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR and VEC_SELECT. * config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl]. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ca1f1f7..0031959 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12077,6 +12077,12 @@ do { \ NULL, NULL_TREE); \ } while (0) +/* Bits for builtin_description.flag. */ + +/* Set when we don't support the comparison natively, and should + swap_comparison in order to support it. */ +#define BUILTIN_DESC_SWAP_OPERANDS 1 + struct builtin_description { const unsigned int mask; @@ -12130,14 +12136,18 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 }, { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 }, { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 }, - { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, + BUILTIN_DESC_SWAP_OPERANDS }, { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 }, { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 }, { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 }, { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 }, - { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, + BUILTIN_DESC_SWAP_OPERANDS }, { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 }, { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 }, { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 }, @@ -12258,14 +12268,18 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 }, { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 }, { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 }, - { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 }, - { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, + BUILTIN_DESC_SWAP_OPERANDS }, { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 }, { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 }, { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 }, { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 }, - { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 }, - { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, + BUILTIN_DESC_SWAP_OPERANDS }, + { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, + BUILTIN_DESC_SWAP_OPERANDS }, { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 }, { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 }, { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 }, @@ -13137,7 +13151,7 @@ ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) if (VECTOR_MODE_P (mode1)) op1 = safe_vector_operand (op1, mode1); - if (! target + if (optimize || !target || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); @@ -13155,9 +13169,11 @@ ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)) abort (); - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + if ((optimize && !register_operand (op0, mode0)) + || !(*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + if ((optimize && !register_operand (op1, mode1)) + || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); /* In the commutative cases, both op0 and op1 are nonimmediate_operand, @@ -13210,7 +13226,7 @@ ix86_expand_unop_builtin (enum insn_code icode, tree arglist, enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode0 = insn_data[icode].operand[1].mode; - if (! target + if (optimize || !target || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); @@ -13221,7 +13237,8 @@ ix86_expand_unop_builtin (enum insn_code icode, tree arglist, if (VECTOR_MODE_P (mode0)) op0 = safe_vector_operand (op0, mode0); - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + if ((optimize && !register_operand (op0, mode0)) + || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); } @@ -13244,7 +13261,7 @@ ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode0 = insn_data[icode].operand[1].mode; - if (! target + if (optimize || !target || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); @@ -13252,7 +13269,8 @@ ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target) if (VECTOR_MODE_P (mode0)) op0 = safe_vector_operand (op0, mode0); - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + if ((optimize && !register_operand (op0, mode0)) + || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); op1 = op0; @@ -13290,7 +13308,7 @@ ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, /* Swap operands if we have a comparison that isn't available in hardware. */ - if (d->flag) + if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) { rtx tmp = gen_reg_rtx (mode1); emit_move_insn (tmp, op1); @@ -13298,14 +13316,16 @@ ix86_expand_sse_compare (const struct builtin_description *d, tree arglist, op0 = tmp; } - if (! target + if (optimize || !target || GET_MODE (target) != tmode || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); - if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) + if ((optimize && !register_operand (op0, mode0)) + || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) + if ((optimize && !register_operand (op1, mode1)) + || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); @@ -13339,7 +13359,7 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, /* Swap operands if we have a comparison that isn't available in hardware. */ - if (d->flag) + if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) { rtx tmp = op1; op1 = op0; @@ -13350,9 +13370,11 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree arglist, emit_move_insn (target, const0_rtx); target = gen_rtx_SUBREG (QImode, target, 0); - if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) + if ((optimize && !register_operand (op0, mode0)) + || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) + if ((optimize && !register_operand (op1, mode1)) + || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); @@ -13449,7 +13471,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + if ((optimize && !register_operand (op1, mode1)) + || ! (*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) { @@ -13470,7 +13493,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case IX86_BUILTIN_MASKMOVQ: case IX86_BUILTIN_MASKMOVDQU: icode = (fcode == IX86_BUILTIN_MASKMOVQ - ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) + ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex + : CODE_FOR_mmx_maskmovq) : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64 : CODE_FOR_sse2_maskmovdqu)); /* Note the arg order is different from the operand order. */ @@ -13537,12 +13561,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, mode0 = insn_data[icode].operand[1].mode; mode1 = insn_data[icode].operand[2].mode; - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); + op0 = force_reg (mode0, op0); op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1)); - if (target == 0 + if (optimize || target == 0 || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + || !register_operand (target, tmode)) target = gen_reg_rtx (tmode); pat = GEN_FCN (icode) (target, op0, op1); if (! pat) @@ -13566,8 +13589,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, mode1 = insn_data[icode].operand[1].mode; op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); + op1 = force_reg (mode1, op1); pat = GEN_FCN (icode) (op0, op1); if (! pat) @@ -13610,7 +13632,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + if ((optimize && !register_operand (op1, mode1)) + || !(*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) { @@ -13618,7 +13641,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, error ("mask must be an immediate"); return gen_reg_rtx (tmode); } - if (target == 0 + if (optimize || target == 0 || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); |