aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2005-01-05 22:22:32 -0800
committerRichard Henderson <rth@gcc.gnu.org>2005-01-05 22:22:32 -0800
commite358acde2cc2c540b85ab7c5df397d653032b70d (patch)
treec9384bc8107956e150e097f383e98ca029b78464 /gcc
parent051d8245886f5f133fad42d38b9d4ce12afd1394 (diff)
downloadgcc-e358acde2cc2c540b85ab7c5df397d653032b70d.zip
gcc-e358acde2cc2c540b85ab7c5df397d653032b70d.tar.gz
gcc-e358acde2cc2c540b85ab7c5df397d653032b70d.tar.bz2
re PR rtl-optimization/11327 (Non-optimal code when using MMX/SSE builtins)
PR target/11327 * config/i386/i386.c (BUILTIN_DESC_SWAP_OPERANDS): New. (bdesc_2arg): Use it. (ix86_expand_binop_builtin): Force operands into registers when optimizing. (ix86_expand_unop_builtin, ix86_expand_unop1_builtin, ix86_expand_sse_compare, ix86_expand_sse_comi, ix86_expand_builtin): Likewise. From-SVN: r92988
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/i386/i386.c87
2 files changed, 66 insertions, 32 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 48586a2..43e651c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
2005-01-05 Richard Henderson <rth@redhat.com>
+ PR target/11327
+ * config/i386/i386.c (BUILTIN_DESC_SWAP_OPERANDS): New.
+ (bdesc_2arg): Use it.
+ (ix86_expand_binop_builtin): Force operands into registers
+ when optimizing.
+ (ix86_expand_unop_builtin, ix86_expand_unop1_builtin,
+ ix86_expand_sse_compare, ix86_expand_sse_comi,
+ ix86_expand_builtin): Likewise.
+
+2005-01-05 Richard Henderson <rth@redhat.com>
+
* config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR
and VEC_SELECT.
* config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl].
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ca1f1f7..0031959 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12077,6 +12077,12 @@ do { \
NULL, NULL_TREE); \
} while (0)
+/* Bits for builtin_description.flag. */
+
+/* Set when we don't support the comparison natively, and should
+ swap_comparison in order to support it. */
+#define BUILTIN_DESC_SWAP_OPERANDS 1
+
struct builtin_description
{
const unsigned int mask;
@@ -12130,14 +12136,18 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
{ MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
{ MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
- { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
- { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
+ { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
+ BUILTIN_DESC_SWAP_OPERANDS },
{ MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
{ MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
{ MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
{ MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
- { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
- { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
+ { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE,
+ BUILTIN_DESC_SWAP_OPERANDS },
{ MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
{ MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
{ MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
@@ -12258,14 +12268,18 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
{ MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
{ MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
- { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
- { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
+ { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
+ BUILTIN_DESC_SWAP_OPERANDS },
{ MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
{ MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
{ MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
{ MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
- { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
- { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
+ { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT,
+ BUILTIN_DESC_SWAP_OPERANDS },
+ { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE,
+ BUILTIN_DESC_SWAP_OPERANDS },
{ MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
{ MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
{ MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
@@ -13137,7 +13151,7 @@ ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
if (VECTOR_MODE_P (mode1))
op1 = safe_vector_operand (op1, mode1);
- if (! target
+ if (optimize || !target
|| GET_MODE (target) != tmode
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
@@ -13155,9 +13169,11 @@ ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
|| (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
abort ();
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
/* In the commutative cases, both op0 and op1 are nonimmediate_operand,
@@ -13210,7 +13226,7 @@ ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- if (! target
+ if (optimize || !target
|| GET_MODE (target) != tmode
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
@@ -13221,7 +13237,8 @@ ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
if (VECTOR_MODE_P (mode0))
op0 = safe_vector_operand (op0, mode0);
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
}
@@ -13244,7 +13261,7 @@ ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- if (! target
+ if (optimize || !target
|| GET_MODE (target) != tmode
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
@@ -13252,7 +13269,8 @@ ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
if (VECTOR_MODE_P (mode0))
op0 = safe_vector_operand (op0, mode0);
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
op1 = op0;
@@ -13290,7 +13308,7 @@ ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
/* Swap operands if we have a comparison that isn't available in
hardware. */
- if (d->flag)
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
{
rtx tmp = gen_reg_rtx (mode1);
emit_move_insn (tmp, op1);
@@ -13298,14 +13316,16 @@ ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
op0 = tmp;
}
- if (! target
+ if (optimize || !target
|| GET_MODE (target) != tmode
|| ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
- if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
+ if ((optimize && !register_operand (op1, mode1))
+ || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
@@ -13339,7 +13359,7 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
/* Swap operands if we have a comparison that isn't available in
hardware. */
- if (d->flag)
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
{
rtx tmp = op1;
op1 = op0;
@@ -13350,9 +13370,11 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
emit_move_insn (target, const0_rtx);
target = gen_rtx_SUBREG (QImode, target, 0);
- if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
@@ -13449,7 +13471,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+ if ((optimize && !register_operand (op1, mode1))
+ || ! (*insn_data[icode].operand[2].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
{
@@ -13470,7 +13493,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
- ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
+ ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex
+ : CODE_FOR_mmx_maskmovq)
: (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
: CODE_FOR_sse2_maskmovdqu));
/* Note the arg order is different from the operand order. */
@@ -13537,12 +13561,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
mode0 = insn_data[icode].operand[1].mode;
mode1 = insn_data[icode].operand[2].mode;
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
+ op0 = force_reg (mode0, op0);
op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
- if (target == 0
+ if (optimize || target == 0
|| GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ || !register_operand (target, tmode))
target = gen_reg_rtx (tmode);
pat = GEN_FCN (icode) (target, op0, op1);
if (! pat)
@@ -13566,8 +13589,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
mode1 = insn_data[icode].operand[1].mode;
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
+ op1 = force_reg (mode1, op1);
pat = GEN_FCN (icode) (op0, op1);
if (! pat)
@@ -13610,7 +13632,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
{
@@ -13618,7 +13641,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
error ("mask must be an immediate");
return gen_reg_rtx (tmode);
}
- if (target == 0
+ if (optimize || target == 0
|| GET_MODE (target) != tmode
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);