aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorBernd Schmidt <bernds@redhat.com>2001-09-28 18:00:35 +0000
committerBernd Schmidt <bernds@gcc.gnu.org>2001-09-28 18:00:35 +0000
commit47f339cf8d8fa88fb209221fd010b2db24470df2 (patch)
treeed3a0fd403af3666e62d6260eb780d02efdfc593 /gcc
parentad91981212e1f9cf0c9b57a93f85e0a5cf1e096b (diff)
downloadgcc-47f339cf8d8fa88fb209221fd010b2db24470df2.zip
gcc-47f339cf8d8fa88fb209221fd010b2db24470df2.tar.gz
gcc-47f339cf8d8fa88fb209221fd010b2db24470df2.tar.bz2
Add support for 3Dnow builtins
From-SVN: r45863
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog31
-rw-r--r--gcc/c-common.c2
-rw-r--r--gcc/config/i386/i386.c216
-rw-r--r--gcc/config/i386/i386.h54
-rw-r--r--gcc/config/i386/i386.md381
-rw-r--r--gcc/tree.c5
-rw-r--r--gcc/tree.h2
7 files changed, 648 insertions, 43 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cfedd18..aaaaa30 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,34 @@
+2001-09-25 Bernd Schmidt <bernds@redhat.com>
+
+ Mostly from Graham Stott <grahams@redhat.com>
+ * c-common.c (type_for_mode): Add support for V2SFmode.
+ * tree.c (build_common_tree_nodes_2): Likewise.
+ * tree.h (enum tree_index, global_trees): Likewise.
+ * config/i386/i386.c (x86_3dnow_a): New variable.
+ (override_options): Support 3Dnow extensions.
+ (bdesc_2arg, bdesc_1arg): Some SSE instructions are also part of
+ Athlon's version of 3Dnow.
+ (ix86_init_mmx_sse_builtins): Create 3Dnow builtins.
+ (ix86_expand_builtin): Handle them.
+ (ix86_hard_regno_mode_ok): Support V2SFmode if using 3Dnow.
+ * config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A, TARGET_3DNOW,
+ TARGET_3DNOW_A): New macros.
+ (TARGET_SWITCHES): Add 3Dnow switches.
+ (VALID_MMX_REG_MODE_3DNOW): New macro.
+ (VECTOR_MODE_SUPPORTED_P): Use it.
+ (enum ix86_builtins): Add entries for 3Dnow builtins.
+ * config/i386/i386.md (movv2sf_internal, movv2sf, pushv2sf, pf2id,
+ pf2iw, addv2sf3, subv2sf3, subrv2sf3, gtv2sf3, gev2sf3, eqv2sf3,
+ pfmaxv23sf3, pfminv2sf3, mulv2sf3, femms, prefetch_3dnow, prefetchw,
+ pfacc, pfnacc, pfpnacc, pi2fw, floatv2si2, pavgusb, pfrcpv2sf2,
+ pfrcpit1v2sf3, pfrcpit2v2sf3, pfrsqrtv2sf2, pfrsqit1v2sf3,
+ pmulhrwvhi3, pswapdv2si2, pswapdv2sf2): New patterns.
+ (mmx_pmovmskb, mmx_maskmovq, sse_movntdi, umulv4hi3_highpart,
+ mmx_uavgv8qi3, mmx_uavgv4hi3, mmx_psadbw, mmx_pinsrw, mmx_pextrw,
+ mmx_pshufw, umaxv8qi3, smaxv4hi3, uminv8qi3, sminv4hi3, sfence,
+ sfence_insn, prefetch): Make these available if TARGET_SSE or
+ TARGET_3DNOW_A.
+
Fri Sep 28 19:18:40 CEST 2001 Jan Hubicka <jh@suse.cz>
* i386-protos.h (ix86_setup_incoming_varargs, ix86_va_arg,
diff --git a/gcc/c-common.c b/gcc/c-common.c
index 74a5552..190c338 100644
--- a/gcc/c-common.c
+++ b/gcc/c-common.c
@@ -1347,6 +1347,8 @@ type_for_mode (mode, unsignedp)
return V4HI_type_node;
if (mode == TYPE_MODE (V8QI_type_node) && VECTOR_MODE_SUPPORTED_P (mode))
return V8QI_type_node;
+ if (mode == TYPE_MODE (V2SF_type_node) && VECTOR_MODE_SUPPORTED_P (mode))
+ return V2SF_type_node;
#endif
return 0;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 889c428..f8b65da 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -323,6 +323,7 @@ const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
+const int x86_3dnow_a = m_ATHLON;
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
const int x86_branch_hints = m_PENT4;
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
@@ -988,6 +989,15 @@ override_options ()
if (TARGET_SSE)
target_flags |= MASK_MMX;
+ /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
+ if (TARGET_3DNOW)
+ {
+ target_flags |= MASK_MMX;
+ /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
+ extensions it adds. */
+ if (x86_3dnow_a & (1 << ix86_arch))
+ target_flags |= MASK_3DNOW_A;
+ }
if ((x86_accumulate_outgoing_args & CPUMASK)
&& !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
&& !optimize_size)
@@ -10731,15 +10741,15 @@ static struct builtin_description bdesc_2arg[] =
{ MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
{ MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
- { MASK_SSE, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
{ MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
{ MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
{ MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
{ MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
- { MASK_SSE, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
- { MASK_SSE, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
{ MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
{ MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
@@ -10748,10 +10758,10 @@ static struct builtin_description bdesc_2arg[] =
{ MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
{ MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
- { MASK_SSE, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
- { MASK_SSE, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
- { MASK_SSE, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
- { MASK_SSE, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
{ MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
{ MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
@@ -10794,7 +10804,7 @@ static struct builtin_description bdesc_2arg[] =
static struct builtin_description bdesc_1arg[] =
{
- { MASK_SSE, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
+ { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
@@ -11034,6 +11044,40 @@ ix86_init_mmx_sse_builtins ()
long_long_unsigned_type_node,
endlink)));
+ tree v2si_ftype_v2sf
+ = build_function_type (V2SI_type_node,
+ tree_cons (NULL_TREE, V2SF_type_node,
+ endlink));
+ tree v2sf_ftype_v2si
+ = build_function_type (V2SF_type_node,
+ tree_cons (NULL_TREE, V2SI_type_node,
+ endlink));
+ tree v2si_ftype_v2si
+ = build_function_type (V2SI_type_node,
+ tree_cons (NULL_TREE, V2SI_type_node,
+ endlink));
+ tree v2sf_ftype_v2sf
+ = build_function_type (V2SF_type_node,
+ tree_cons (NULL_TREE, V2SF_type_node,
+ endlink));
+ tree v2sf_ftype_v2sf_v2sf
+ = build_function_type (V2SF_type_node,
+ tree_cons (NULL_TREE, V2SF_type_node,
+ tree_cons (NULL_TREE,
+ V2SF_type_node,
+ endlink)));
+ tree v2si_ftype_v2sf_v2sf
+ = build_function_type (V2SI_type_node,
+ tree_cons (NULL_TREE, V2SF_type_node,
+ tree_cons (NULL_TREE,
+ V2SF_type_node,
+ endlink)));
+
+ tree void_ftype_pchar
+ = build_function_type (void_type_node,
+ tree_cons (NULL_TREE, pchar_type_node,
+ endlink));
+
/* Add all builtins that are more or less simple operations on two
operands. */
for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
@@ -11047,9 +11091,6 @@ ix86_init_mmx_sse_builtins ()
continue;
mode = insn_data[d->icode].operand[1].mode;
- if (! TARGET_SSE && ! VALID_MMX_REG_MODE (mode))
- continue;
-
switch (mode)
{
case V4SFmode:
@@ -11121,10 +11162,10 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
- def_builtin (MASK_SSE, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
- def_builtin (MASK_SSE, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
- def_builtin (MASK_SSE, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
@@ -11139,14 +11180,14 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE, "__builtin_ia32_storelps", v4sf_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
- def_builtin (MASK_SSE, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
- def_builtin (MASK_SSE, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
- def_builtin (MASK_SSE, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
- def_builtin (MASK_SSE, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
- def_builtin (MASK_SSE, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+ def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
@@ -11157,6 +11198,38 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
+ /* Original 3DNow! */
+ def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
+ def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
+
+ /* 3DNow! extension as used in the Athlon CPU. */
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
+ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
+
/* Composite intrinsics. */
def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
@@ -11179,7 +11252,7 @@ safe_vector_operand (x, mode)
return x;
x = gen_reg_rtx (mode);
- if (VALID_MMX_REG_MODE (mode))
+ if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
emit_insn (gen_mmx_clrdi (mode == DImode ? x
: gen_rtx_SUBREG (DImode, x, 0)));
else
@@ -11739,6 +11812,107 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
emit_insn (pat);
return target;
+ case IX86_BUILTIN_FEMMS:
+ emit_insn (gen_femms ());
+ return NULL_RTX;
+
+ case IX86_BUILTIN_PAVGUSB:
+ return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
+
+ case IX86_BUILTIN_PF2ID:
+ return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
+
+ case IX86_BUILTIN_PFACC:
+ return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
+
+ case IX86_BUILTIN_PFADD:
+ return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPEQ:
+ return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPGE:
+ return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFCMPGT:
+ return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMAX:
+ return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMIN:
+ return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFMUL:
+ return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRCP:
+ return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
+
+ case IX86_BUILTIN_PFRCPIT1:
+ return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRCPIT2:
+ return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRSQIT1:
+ return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFRSQRT:
+ return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
+
+ case IX86_BUILTIN_PFSUB:
+ return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PFSUBR:
+ return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
+
+ case IX86_BUILTIN_PI2FD:
+ return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
+
+ case IX86_BUILTIN_PMULHRW:
+ return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
+
+ case IX86_BUILTIN_PREFETCH_3DNOW:
+ icode = CODE_FOR_prefetch_3dnow;
+ arg0 = TREE_VALUE (arglist);
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ mode0 = insn_data[icode].operand[0].mode;
+ pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
+ if (! pat)
+ return NULL_RTX;
+ emit_insn (pat);
+ return NULL_RTX;
+
+ case IX86_BUILTIN_PREFETCHW:
+ icode = CODE_FOR_prefetchw;
+ arg0 = TREE_VALUE (arglist);
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ mode0 = insn_data[icode].operand[0].mode;
+ pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0));
+ if (! pat)
+ return NULL_RTX;
+ emit_insn (pat);
+ return NULL_RTX;
+
+ case IX86_BUILTIN_PF2IW:
+ return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
+
+ case IX86_BUILTIN_PFNACC:
+ return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
+
+ case IX86_BUILTIN_PFPNACC:
+ return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
+
+ case IX86_BUILTIN_PI2FW:
+ return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
+
+ case IX86_BUILTIN_PSWAPDSI:
+ return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
+
+ case IX86_BUILTIN_PSWAPDSF:
+ return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
+
/* Composite intrinsics. */
case IX86_BUILTIN_SETPS1:
target = assign_386_stack_local (SFmode, 0);
@@ -12055,7 +12229,7 @@ ix86_hard_regno_mode_ok (regno, mode)
if (SSE_REGNO_P (regno))
return VALID_SSE_REG_MODE (mode);
if (MMX_REGNO_P (regno))
- return VALID_MMX_REG_MODE (mode);
+ return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
/* We handle both integer and floats in the general purpose registers.
In future we should be able to handle vector modes as well. */
if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 654d8fb..fc6f1c9 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -118,10 +118,12 @@ extern int target_flags;
#define MASK_MMX 0x00020000 /* Support MMX regs/builtins */
#define MASK_SSE 0x00040000 /* Support SSE regs/builtins */
#define MASK_SSE2 0x00080000 /* Support SSE2 regs/builtins */
-#define MASK_128BIT_LONG_DOUBLE 0x00100000 /* long double size is 128bit */
-#define MASK_MIX_SSE_I387 0x00200000 /* Mix SSE and i387 instructions */
-#define MASK_64BIT 0x00400000 /* Produce 64bit code */
-#define MASK_NO_RED_ZONE 0x00800000 /* Do not use red zone */
+#define MASK_3DNOW 0x00100000 /* Support 3Dnow builtins */
+#define MASK_3DNOW_A 0x00200000 /* Support Athlon 3Dnow builtins */
+#define MASK_128BIT_LONG_DOUBLE 0x00400000 /* long double size is 128bit */
+#define MASK_MIX_SSE_I387 0x00800000 /* Mix SSE and i387 instructions */
+#define MASK_64BIT 0x01000000 /* Produce 64bit code */
+#define MASK_NO_RED_ZONE 0x02000000 /* Do not use red zone */
/* Temporary codegen switches */
#define MASK_INTEL_SYNTAX 0x00000200
@@ -264,6 +266,8 @@ extern const int x86_epilogue_using_move;
#define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0)
#define TARGET_MIX_SSE_I387 ((target_flags & MASK_MIX_SSE_I387) != 0)
#define TARGET_MMX ((target_flags & MASK_MMX) != 0)
+#define TARGET_3DNOW ((target_flags & MASK_3DNOW) != 0)
+#define TARGET_3DNOW_A ((target_flags & MASK_3DNOW_A) != 0)
#define TARGET_RED_ZONE (!(target_flags & MASK_NO_RED_ZONE))
@@ -335,6 +339,10 @@ extern const int x86_epilogue_using_move;
{ "mmx", MASK_MMX, N_("Support MMX builtins") }, \
{ "no-mmx", -MASK_MMX, \
N_("Do not support MMX builtins") }, \
+ { "3dnow", MASK_3DNOW, \
+ N_("Support 3DNow! builtins") }, \
+ { "no-3dnow", -MASK_3DNOW, \
+ N_("Do not support 3DNow! builtins") }, \
{ "sse", MASK_SSE, \
N_("Support MMX and SSE builtins and code generation") }, \
{ "no-sse", -MASK_SSE, \
@@ -918,13 +926,17 @@ extern int ix86_arch;
|| (MODE) == SFmode \
|| (TARGET_SSE2 && ((MODE) == DFmode || VALID_MMX_REG_MODE (MODE))))
+#define VALID_MMX_REG_MODE_3DNOW(MODE) \
+ ((MODE) == V2SFmode || (MODE) == SFmode)
+
#define VALID_MMX_REG_MODE(MODE) \
((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \
|| (MODE) == V2SImode || (MODE) == SImode)
#define VECTOR_MODE_SUPPORTED_P(MODE) \
(VALID_SSE_REG_MODE (MODE) && TARGET_SSE ? 1 \
- : VALID_MMX_REG_MODE (MODE) && TARGET_MMX ? 1 : 0)
+ : VALID_MMX_REG_MODE (MODE) && TARGET_MMX ? 1 \
+ : VALID_MMX_REG_MODE_3DNOW (MODE) && TARGET_3DNOW ? 1 : 0)
#define VALID_FP_MODE_P(mode) \
((mode) == SFmode || (mode) == DFmode || (mode) == TFmode \
@@ -2204,6 +2216,38 @@ enum ix86_builtins
IX86_BUILTIN_SFENCE,
IX86_BUILTIN_PREFETCH,
+ /* 3DNow! Original */
+ IX86_BUILTIN_FEMMS,
+ IX86_BUILTIN_PAVGUSB,
+ IX86_BUILTIN_PF2ID,
+ IX86_BUILTIN_PFACC,
+ IX86_BUILTIN_PFADD,
+ IX86_BUILTIN_PFCMPEQ,
+ IX86_BUILTIN_PFCMPGE,
+ IX86_BUILTIN_PFCMPGT,
+ IX86_BUILTIN_PFMAX,
+ IX86_BUILTIN_PFMIN,
+ IX86_BUILTIN_PFMUL,
+ IX86_BUILTIN_PFRCP,
+ IX86_BUILTIN_PFRCPIT1,
+ IX86_BUILTIN_PFRCPIT2,
+ IX86_BUILTIN_PFRSQIT1,
+ IX86_BUILTIN_PFRSQRT,
+ IX86_BUILTIN_PFSUB,
+ IX86_BUILTIN_PFSUBR,
+ IX86_BUILTIN_PI2FD,
+ IX86_BUILTIN_PMULHRW,
+ IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */
+ IX86_BUILTIN_PREFETCHW,
+
+ /* 3DNow! Athlon Extensions */
+ IX86_BUILTIN_PF2IW,
+ IX86_BUILTIN_PFNACC,
+ IX86_BUILTIN_PFPNACC,
+ IX86_BUILTIN_PI2FW,
+ IX86_BUILTIN_PSWAPDSI,
+ IX86_BUILTIN_PSWAPDSF,
+
/* Composite builtins, expand to more than one insn. */
IX86_BUILTIN_SETPS1,
IX86_BUILTIN_SETPS,
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5119db6..3b98788 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -92,6 +92,15 @@
;; 43 This is a `rsqsrt' operation.
;; 44 This is a `sfence' operation.
;; 45 This is a noop to prevent excessive combiner cleverness.
+;; 46 This is a `femms' operation.
+;; 47 This is a `prefetch' (3DNow) operation.
+;; 48 This is a `prefetchw' operation.
+;; 49 This is a 'pavgusb' operation.
+;; 50 This is a `pfrcp' operation.
+;; 51 This is a `pfrcpit1' operation.
+;; 52 This is a `pfrcpit2' operation.
+;; 53 This is a `pfrsqrt' operation.
+;; 54 This is a `pfrsqrit1' operation.
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
;; from i386.c.
@@ -17455,6 +17464,13 @@
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
+(define_insn "movv2sf_internal"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
+ (match_operand:V2SF 1 "general_operand" "ym,y"))]
+ "TARGET_3DNOW"
+ "movq\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
(define_expand "movti"
[(set (match_operand:TI 0 "general_operand" "")
(match_operand:TI 1 "general_operand" ""))]
@@ -17641,6 +17657,40 @@
}
})
+(define_expand "movv2sf"
+ [(set (match_operand:V2SF 0 "general_operand" "")
+ (match_operand:V2SF 1 "general_operand" ""))]
+ "TARGET_3DNOW"
+ "
+{
+ /* For constants other than zero into memory. We do not know how the
+ instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], V2SFmode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (addr,
+ XEXP (force_const_mem (V2SFmode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (V2SFmode, addr);
+ }
+
+ /* Make operand1 a register is it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], V2SFmode)
+ && !register_operand (operands[1], V2SFmode)
+ && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
+ && operands[1] != CONST0_RTX (V2SFmode))
+ {
+ rtx temp = force_reg (V2SFmode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ DONE;
+ }
+}")
+
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
(match_operand:TI 1 "nonmemory_operand" "x"))]
@@ -17707,6 +17757,17 @@
""
[(set_attr "type" "mmx")])
+(define_insn_and_split "*pushv2sf"
+ [(set (match_operand:V2SF 0 "push_operand" "=<")
+ (match_operand:V2SF 1 "nonmemory_operand" "y"))]
+ "TARGET_3DNOW"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+ (set (mem:V2SF (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "mmx")])
+
(define_insn "movti_internal"
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
(match_operand:TI 1 "general_operand" "xm,x"))]
@@ -17749,7 +17810,7 @@
(define_insn "mmx_pmovmskb"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pmovmskb\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
@@ -17757,7 +17818,7 @@
[(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
(match_operand:V8QI 2 "register_operand" "y")] 32))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
;; @@@ check ordering of operands in intel/nonintel syntax
"maskmovq\t{%2, %1|%1, %2}"
[(set_attr "type" "sse")])
@@ -17772,7 +17833,7 @@
(define_insn "sse_movntdi"
[(set (match_operand:DI 0 "memory_operand" "=m")
(unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"movntq\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
@@ -18535,7 +18596,7 @@
(mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
(zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
- "TARGET_MMX"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pmulhuw\t{%2, %0|%0, %2}"
[(set_attr "type" "mmx")])
@@ -18628,7 +18689,7 @@
(const_int 1)
(const_int 1)])))
(const_int 1)))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pavgb\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18643,7 +18704,7 @@
(const_int 1)
(const_int 1)])))
(const_int 1)))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pavgw\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18651,7 +18712,7 @@
[(set (match_operand:V8QI 0 "register_operand" "=y")
(abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym"))))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"psadbw\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18664,7 +18725,7 @@
(vec_duplicate:V4HI
(truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm")))
(match_operand:SI 3 "immediate_operand" "i")))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pinsrw\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "sse")])
@@ -18673,7 +18734,7 @@
(zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
(parallel
[(match_operand:SI 2 "immediate_operand" "i")]))))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pextrw\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sse")])
@@ -18682,7 +18743,7 @@
(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")
(match_operand:SI 3 "immediate_operand" "i")] 41))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pshufw\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "sse")])
@@ -18744,7 +18805,7 @@
[(set (match_operand:V8QI 0 "register_operand" "=y")
(umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pmaxub\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18752,7 +18813,7 @@
[(set (match_operand:V4HI 0 "register_operand" "=y")
(smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pmaxsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18760,7 +18821,7 @@
[(set (match_operand:V8QI 0 "register_operand" "=y")
(umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pminub\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18768,7 +18829,7 @@
[(set (match_operand:V4HI 0 "register_operand" "=y")
(smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"pminsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -19031,7 +19092,7 @@
(define_expand "sfence"
[(set (match_dup 0)
(unspec:BLK [(match_dup 0)] 44))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
{
operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
MEM_VOLATILE_P (operands[0]) = 1;
@@ -19040,7 +19101,7 @@
(define_insn "*sfence_insn"
[(set (match_operand:BLK 0 "" "")
(unspec:BLK [(match_dup 0)] 44))]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
"sfence"
[(set_attr "type" "sse")
(set_attr "memory" "unknown")])
@@ -19048,7 +19109,7 @@
(define_insn "prefetch"
[(unspec [(match_operand:SI 0 "address_operand" "p")
(match_operand:SI 1 "immediate_operand" "n")] 35)]
- "TARGET_SSE"
+ "TARGET_SSE || TARGET_3DNOW_A"
{
switch (INTVAL (operands[1]))
{
@@ -19126,3 +19187,289 @@
(set_attr "memory" "store")
(set_attr "modrm" "0")
(set_attr "mode" "DI")])
+
+;; 3Dnow! instructions
+
+(define_insn "addv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (plus:V2SF (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfadd\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "subv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (minus:V2SF (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfsub\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "subrv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_3DNOW"
+ "pfsubr\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "gtv2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfcmpgt\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "gev2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfcmpge\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "eqv2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (eq:V2SI (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfcmpeq\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfmaxv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (smax:V2SF (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfmax\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfminv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (smin:V2SF (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfmin\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mulv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (mult:V2SF (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfmul\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "femms"
+ [(unspec_volatile [(const_int 0)] 46)
+ (clobber (reg:XF 8))
+ (clobber (reg:XF 9))
+ (clobber (reg:XF 10))
+ (clobber (reg:XF 11))
+ (clobber (reg:XF 12))
+ (clobber (reg:XF 13))
+ (clobber (reg:XF 14))
+ (clobber (reg:XF 15))
+ (clobber (reg:DI 29))
+ (clobber (reg:DI 30))
+ (clobber (reg:DI 31))
+ (clobber (reg:DI 32))
+ (clobber (reg:DI 33))
+ (clobber (reg:DI 34))
+ (clobber (reg:DI 35))
+ (clobber (reg:DI 36))]
+ "TARGET_3DNOW"
+ "femms"
+ [(set_attr "type" "mmx")])
+
+(define_insn "prefetch_3dnow"
+ [(unspec [(match_operand:SI 0 "address_operand" "p")] 47)]
+ "TARGET_3DNOW"
+ "prefetch\\t%a0"
+ [(set_attr "type" "mmx")])
+
+(define_insn "prefetchw"
+ [(unspec [(match_operand:SI 0 "address_operand" "p")] 48)]
+ "TARGET_3DNOW"
+ "prefetchw\\t%a0"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pf2id"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pf2id\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pf2iw"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (sign_extend:V2SI
+ (ss_truncate:V2HI
+ (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
+ "TARGET_3DNOW_A"
+ "pf2iw\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfacc"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_concat:V2SF
+ (plus:SF
+ (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1)
+ (parallel [(const_int 1)])))
+ (plus:SF
+ (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2)
+ (parallel [(const_int 1)])))))]
+ "TARGET_3DNOW"
+ "pfacc\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfnacc"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_concat:V2SF
+ (minus:SF
+ (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1)
+ (parallel [(const_int 1)])))
+ (minus:SF
+ (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2)
+ (parallel [(const_int 1)])))))]
+ "TARGET_3DNOW_A"
+ "pfnacc\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfpnacc"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_concat:V2SF
+ (minus:SF
+ (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1)
+ (parallel [(const_int 1)])))
+ (plus:SF
+ (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2)
+ (parallel [(const_int 1)])))))]
+ "TARGET_3DNOW_A"
+ "pfpnacc\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pi2fw"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (float:V2SF
+ (vec_concat:V2SI
+ (sign_extend:SI
+ (truncate:HI
+ (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))))
+ (sign_extend:SI
+ (truncate:HI
+ (vec_select:SI (match_dup 1)
+ (parallel [(const_int 1)])))))))]
+ "TARGET_3DNOW_A"
+ "pi2fw\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "floatv2si2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pi2fd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+;; This insn is identical to pavgb in operation, but the opcode is
+;; different. To avoid accidentally matching pavgb, use an unspec.
+
+(define_insn "pavgusb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (unspec:V8QI
+ [(match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))]
+ "TARGET_3DNOW"
+ "pavgusb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+;; 3DNow reciprical and sqrt
+
+(define_insn "pfrcpv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))]
+ "TARGET_3DNOW"
+ "pfrcp\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfrcpit1v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))]
+ "TARGET_3DNOW"
+ "pfrcpit1\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfrcpit2v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))]
+ "TARGET_3DNOW"
+ "pfrcpit2\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfrsqrtv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))]
+ "TARGET_3DNOW"
+ "pfrsqrt\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pfrsqit1v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))]
+ "TARGET_3DNOW"
+ "pfrsqit1\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pmulhrwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (vec_const:V4SI
+ (parallel [(const_int 0x8000)
+ (const_int 0x8000)
+ (const_int 0x8000)
+ (const_int 0x8000)])))
+ (const_int 16))))]
+ "TARGET_3DNOW"
+ "pmulhrw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pswapdv2si2"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "TARGET_3DNOW_A"
+ "pswapd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "pswapdv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "TARGET_3DNOW_A"
+ "pswapd\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
diff --git a/gcc/tree.c b/gcc/tree.c
index 8a8d22d..62b0760 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -4877,4 +4877,9 @@ build_common_tree_nodes_2 (short_double)
TREE_TYPE (V8QI_type_node) = intQI_type_node;
TYPE_MODE (V8QI_type_node) = V8QImode;
finish_vector_type (V8QI_type_node);
+
+ V2SF_type_node = make_node (VECTOR_TYPE);
+ TREE_TYPE (V2SF_type_node) = float_type_node;
+ TYPE_MODE (V2SF_type_node) = V2SFmode;
+ finish_vector_type (V2SF_type_node);
}
diff --git a/gcc/tree.h b/gcc/tree.h
index b95a4aa..d4306a8 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1846,6 +1846,7 @@ enum tree_index
TI_V8QI_TYPE,
TI_V4HI_TYPE,
TI_V2SI_TYPE,
+ TI_V2SF_TYPE,
TI_MAIN_IDENTIFIER,
@@ -1911,6 +1912,7 @@ extern tree global_trees[TI_MAX];
#define V8QI_type_node global_trees[TI_V8QI_TYPE]
#define V4HI_type_node global_trees[TI_V4HI_TYPE]
#define V2SI_type_node global_trees[TI_V2SI_TYPE]
+#define V2SF_type_node global_trees[TI_V2SF_TYPE]
/* An enumeration of the standard C integer types. These must be
ordered so that shorter types appear before longer ones. */