aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog22
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c281
-rw-r--r--gcc/config/i386/i386.h16
-rw-r--r--gcc/config/i386/i386.md562
-rw-r--r--gcc/config/i386/xmmintrin.h10
6 files changed, 335 insertions, 558 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d5e4eb9..93ceae3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2002-01-12 Richard Henderson <rth@redhat.com>
+
+ * config/i386/i386.c (override_options): If SSE, enable sse prefetch.
+ (ix86_expand_vector_move): New.
+ (bdesc_2arg): Remove andps, andnps, orps, xorps.
+ (ix86_init_mmx_sse_builtins): Make static. Remove composite builtins.
+ Remove old prefetch builtins. Special case the logicals removed above.
+ (ix86_expand_builtin): Likewise.
+ (safe_vector_operand): Use V4SFmode, not TImode.
+ (ix86_expand_store_builtin): Remove shuffle arg. Update callers.
+ (ix86_expand_timode_binop_builtin): New.
+ * config/i386/i386-protos.h: Update.
+ * config/i386/i386.h (enum ix86_builtins): Update.
+ * config/i386/i386.md: Correct predicates on MMX/SSE patterns.
+ Use ix86_expand_vector_move in vector move expanders.
+ (movti_internal, movti_rex64): Add xorps alternative.
+ (sse_clrv4sf): Rename and adjust from sse_clrti.
+ (prefetch): Don't work so hard.
+ (prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC.
+ * config/i386/xmmintrin.h (__m128): Use V4SFmode.
+ (_mm_getcsr, _mm_setcsr): Fix typo in builtin name.
+
2002-01-11 Richard Henderson <rth@redhat.com>
* config/i386/mmintrin.h: New file.
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 42a8f4a..01c4d44 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -108,6 +108,7 @@ extern rtx i386_simplify_dwarf_addr PARAMS ((rtx));
extern void ix86_expand_clear PARAMS ((rtx));
extern void ix86_expand_move PARAMS ((enum machine_mode, rtx[]));
+extern void ix86_expand_vector_move PARAMS ((enum machine_mode, rtx[]));
extern void ix86_expand_binary_operator PARAMS ((enum rtx_code,
enum machine_mode, rtx[]));
extern int ix86_binary_operator_ok PARAMS ((enum rtx_code, enum machine_mode,
@@ -177,7 +178,6 @@ extern void function_arg_advance PARAMS ((CUMULATIVE_ARGS *, enum machine_mode,
tree, int));
extern rtx ix86_function_value PARAMS ((tree));
extern void ix86_init_builtins PARAMS ((void));
-extern void ix86_init_mmx_sse_builtins PARAMS ((void));
extern rtx ix86_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
#endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8eebf5f..08c9ca6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -684,6 +684,7 @@ static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static void ix86_sched_init PARAMS ((FILE *, int, int));
static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
+static void ix86_init_mmx_sse_builtins PARAMS ((void));
struct ix86_address
{
@@ -701,7 +702,9 @@ static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
-static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
+static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
+ tree, rtx));
+static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
@@ -1164,7 +1167,10 @@ override_options ()
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
if (TARGET_SSE)
- target_flags |= MASK_MMX;
+ {
+ target_flags |= MASK_MMX;
+ x86_prefetch_sse = true;
+ }
/* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
if (TARGET_3DNOW)
@@ -6661,6 +6667,38 @@ ix86_expand_move (mode, operands)
emit_insn (insn);
}
+void
+ix86_expand_vector_move (mode, operands)
+ enum machine_mode mode;
+ rtx operands[];
+{
+ /* Force constants other than zero into memory. We do not know how
+ the instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], mode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+ emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (mode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], mode)
+ && !register_operand (operands[1], mode)
+ && operands[1] != CONST0_RTX (mode))
+ {
+ rtx temp = force_reg (TImode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ return;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+}
+
/* Attempt to expand a binary operator. Make the expansion closer to the
actual machine, then just general_operand, which will allow 3 separate
memory references (one output, two input) in a single insn. */
@@ -10748,11 +10786,6 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
{ MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
-
{ MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
@@ -10865,7 +10898,7 @@ ix86_init_builtins ()
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
builtins. */
-void
+static void
ix86_init_mmx_sse_builtins ()
{
const struct builtin_description * d;
@@ -10899,14 +10932,6 @@ ix86_init_mmx_sse_builtins ()
= build_function_type (integer_type_node,
tree_cons (NULL_TREE, V8QI_type_node,
endlink));
- tree int_ftype_v2si
- = build_function_type (integer_type_node,
- tree_cons (NULL_TREE, V2SI_type_node,
- endlink));
- tree v2si_ftype_int
- = build_function_type (V2SI_type_node,
- tree_cons (NULL_TREE, integer_type_node,
- endlink));
tree v4sf_ftype_v4sf_int
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
@@ -10976,11 +11001,6 @@ ix86_init_mmx_sse_builtins ()
endlink)));
tree void_ftype_void
= build_function_type (void_type_node, endlink);
- tree void_ftype_pchar_int
- = build_function_type (void_type_node,
- tree_cons (NULL_TREE, pchar_type_node,
- tree_cons (NULL_TREE, integer_type_node,
- endlink)));
tree void_ftype_unsigned
= build_function_type (void_type_node,
tree_cons (NULL_TREE, unsigned_type_node,
@@ -10989,8 +11009,8 @@ ix86_init_mmx_sse_builtins ()
= build_function_type (unsigned_type_node, endlink);
tree di_ftype_void
= build_function_type (long_long_unsigned_type_node, endlink);
- tree ti_ftype_void
- = build_function_type (intTI_type_node, endlink);
+ tree v4sf_ftype_void
+ = build_function_type (V4SF_type_node, endlink);
tree v2si_ftype_v4sf
= build_function_type (V2SI_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
@@ -11007,19 +11027,6 @@ ix86_init_mmx_sse_builtins ()
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, pfloat_type_node,
endlink));
- tree v4sf_ftype_float
- = build_function_type (V4SF_type_node,
- tree_cons (NULL_TREE, float_type_node,
- endlink));
- tree v4sf_ftype_float_float_float_float
- = build_function_type (V4SF_type_node,
- tree_cons (NULL_TREE, float_type_node,
- tree_cons (NULL_TREE, float_type_node,
- tree_cons (NULL_TREE,
- float_type_node,
- tree_cons (NULL_TREE,
- float_type_node,
- endlink)))));
/* @@@ the type is bogus */
tree v4sf_ftype_v4sf_pv2si
= build_function_type (V4SF_type_node,
@@ -11069,11 +11076,6 @@ ix86_init_mmx_sse_builtins ()
tree_cons (NULL_TREE, V2SI_type_node,
tree_cons (NULL_TREE, V2SI_type_node,
endlink)));
- tree ti_ftype_ti_ti
- = build_function_type (intTI_type_node,
- tree_cons (NULL_TREE, intTI_type_node,
- tree_cons (NULL_TREE, intTI_type_node,
- endlink)));
tree di_ftype_di_di
= build_function_type (long_long_unsigned_type_node,
tree_cons (NULL_TREE, long_long_unsigned_type_node,
@@ -11110,11 +11112,6 @@ ix86_init_mmx_sse_builtins ()
V2SF_type_node,
endlink)));
- tree void_ftype_pchar
- = build_function_type (void_type_node,
- tree_cons (NULL_TREE, pchar_type_node,
- endlink));
-
/* Add all builtins that are more or less simple operations on two
operands. */
for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
@@ -11142,9 +11139,6 @@ ix86_init_mmx_sse_builtins ()
case V2SImode:
type = v2si_ftype_v2si_v2si;
break;
- case TImode:
- type = ti_ftype_ti_ti;
- break;
case DImode:
type = di_ftype_di_di;
break;
@@ -11164,8 +11158,6 @@ ix86_init_mmx_sse_builtins ()
}
/* Add the remaining MMX insns with somewhat more complicated types. */
- def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
- def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
@@ -11199,6 +11191,11 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+ def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
+
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
@@ -11222,7 +11219,6 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
@@ -11256,8 +11252,6 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
- def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
- def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
/* 3DNow! extension as used in the Athlon CPU. */
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
@@ -11267,14 +11261,7 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
- /* Composite intrinsics. */
- def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
- def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
- def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
- def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
- def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
- def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
- def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
@@ -11293,8 +11280,8 @@ safe_vector_operand (x, mode)
emit_insn (gen_mmx_clrdi (mode == DImode ? x
: gen_rtx_SUBREG (DImode, x, 0)));
else
- emit_insn (gen_sse_clrti (mode == TImode ? x
- : gen_rtx_SUBREG (TImode, x, 0)));
+ emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
+ : gen_rtx_SUBREG (V4SFmode, x, 0)));
return x;
}
@@ -11342,13 +11329,45 @@ ix86_expand_binop_builtin (icode, arglist, target)
return target;
}
+/* In type_for_mode we restrict the ability to create TImode types
+ to hosts with 64-bit H_W_I. So we've defined the SSE logicals
+ to have a V4SFmode signature. Convert them in-place to TImode. */
+
+static rtx
+ix86_expand_timode_binop_builtin (icode, arglist, target)
+ enum insn_code icode;
+ tree arglist;
+ rtx target;
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+ op0 = gen_lowpart (TImode, op0);
+ op1 = gen_lowpart (TImode, op1);
+ target = gen_reg_rtx (TImode);
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
+ op0 = copy_to_mode_reg (TImode, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
+ op1 = copy_to_mode_reg (TImode, op1);
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+
+ return gen_lowpart (V4SFmode, target);
+}
+
/* Subroutine of ix86_expand_builtin to take care of stores. */
static rtx
-ix86_expand_store_builtin (icode, arglist, shuffle)
+ix86_expand_store_builtin (icode, arglist)
enum insn_code icode;
tree arglist;
- int shuffle;
{
rtx pat;
tree arg0 = TREE_VALUE (arglist);
@@ -11362,10 +11381,6 @@ ix86_expand_store_builtin (icode, arglist, shuffle)
op1 = safe_vector_operand (op1, mode1);
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (shuffle >= 0)
- emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
pat = GEN_FCN (icode) (op0, op1);
if (pat)
emit_insn (pat);
@@ -11568,7 +11583,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
enum insn_code icode;
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
tree arglist = TREE_OPERAND (exp, 1);
- tree arg0, arg1, arg2, arg3;
+ tree arg0, arg1, arg2;
rtx op0, op1, op2, pat;
enum machine_mode tmode, mode0, mode1, mode2;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
@@ -11583,19 +11598,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
emit_insn (gen_sfence ());
return 0;
- case IX86_BUILTIN_M_FROM_INT:
- target = gen_reg_rtx (DImode);
- op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
- emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
- return target;
-
- case IX86_BUILTIN_M_TO_INT:
- op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
- op0 = copy_to_mode_reg (DImode, op0);
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
- return target;
-
case IX86_BUILTIN_PEXTRW:
icode = CODE_FOR_mmx_pextrw;
arg0 = TREE_VALUE (arglist);
@@ -11689,6 +11691,19 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_RCPSS:
return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
+ case IX86_BUILTIN_ANDPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
+ arglist, target);
+ case IX86_BUILTIN_ANDNPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
+ arglist, target);
+ case IX86_BUILTIN_ORPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
+ arglist, target);
+ case IX86_BUILTIN_XORPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
+ arglist, target);
+
case IX86_BUILTIN_LOADAPS:
return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
@@ -11696,15 +11711,15 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
case IX86_BUILTIN_STOREAPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
case IX86_BUILTIN_STOREUPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
case IX86_BUILTIN_LOADSS:
return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
case IX86_BUILTIN_STORESS:
- return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
case IX86_BUILTIN_LOADHPS:
case IX86_BUILTIN_LOADLPS:
@@ -11753,9 +11768,9 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
return 0;
case IX86_BUILTIN_MOVNTPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
case IX86_BUILTIN_MOVNTQ:
- return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
case IX86_BUILTIN_LDMXCSR:
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
@@ -11769,29 +11784,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
emit_insn (gen_stmxcsr (target));
return copy_to_mode_reg (SImode, target);
- case IX86_BUILTIN_PREFETCH:
- icode = CODE_FOR_prefetch_sse;
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- {
- /* @@@ better error message */
- error ("selector must be an immediate");
- return const0_rtx;
- }
-
- op0 = copy_to_mode_reg (Pmode, op0);
- pat = GEN_FCN (icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
case IX86_BUILTIN_SHUFPS:
icode = CODE_FOR_sse_shufps;
arg0 = TREE_VALUE (arglist);
@@ -11914,19 +11906,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_PMULHRW:
return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
- case IX86_BUILTIN_PREFETCH_3DNOW:
- case IX86_BUILTIN_PREFETCHW:
- icode = CODE_FOR_prefetch_3dnow;
- arg0 = TREE_VALUE (arglist);
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
- mode0 = insn_data[icode].operand[0].mode;
- pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
- if (! pat)
- return NULL_RTX;
- emit_insn (pat);
- return NULL_RTX;
-
case IX86_BUILTIN_PF2IW:
return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
@@ -11945,57 +11924,11 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_PSWAPDSF:
return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
- /* Composite intrinsics. */
- case IX86_BUILTIN_SETPS1:
- target = assign_386_stack_local (SFmode, 0);
- arg0 = TREE_VALUE (arglist);
- emit_move_insn (adjust_address (target, SFmode, 0),
- expand_expr (arg0, NULL_RTX, VOIDmode, 0));
- op0 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
- emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
- return op0;
-
- case IX86_BUILTIN_SETPS:
- target = assign_386_stack_local (V4SFmode, 0);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
- emit_move_insn (adjust_address (target, SFmode, 0),
- expand_expr (arg0, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 4),
- expand_expr (arg1, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 8),
- expand_expr (arg2, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 12),
- expand_expr (arg3, NULL_RTX, VOIDmode, 0));
- op0 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_movaps (op0, target));
- return op0;
-
- case IX86_BUILTIN_CLRPS:
- target = gen_reg_rtx (TImode);
- emit_insn (gen_sse_clrti (target));
- return target;
-
- case IX86_BUILTIN_LOADRPS:
- target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
- gen_reg_rtx (V4SFmode), 1);
- emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
+ case IX86_BUILTIN_SSE_ZERO:
+ target = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_clrv4sf (target));
return target;
- case IX86_BUILTIN_LOADPS1:
- target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
- gen_reg_rtx (V4SFmode), 1);
- emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
- return target;
-
- case IX86_BUILTIN_STOREPS1:
- return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
- case IX86_BUILTIN_STORERPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
-
case IX86_BUILTIN_MMX_ZERO:
target = gen_reg_rtx (DImode);
emit_insn (gen_mmx_clrdi (target));
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f7f569d..8f8e208 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2089,8 +2089,6 @@ enum ix86_builtins
IX86_BUILTIN_CVTSS2SI,
IX86_BUILTIN_CVTTPS2PI,
IX86_BUILTIN_CVTTSS2SI,
- IX86_BUILTIN_M_FROM_INT,
- IX86_BUILTIN_M_TO_INT,
IX86_BUILTIN_MAXPS,
IX86_BUILTIN_MAXSS,
@@ -2215,7 +2213,6 @@ enum ix86_builtins
IX86_BUILTIN_LDMXCSR,
IX86_BUILTIN_STMXCSR,
IX86_BUILTIN_SFENCE,
- IX86_BUILTIN_PREFETCH,
/* 3DNow! Original */
IX86_BUILTIN_FEMMS,
@@ -2238,8 +2235,6 @@ enum ix86_builtins
IX86_BUILTIN_PFSUBR,
IX86_BUILTIN_PI2FD,
IX86_BUILTIN_PMULHRW,
- IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */
- IX86_BUILTIN_PREFETCHW,
/* 3DNow! Athlon Extensions */
IX86_BUILTIN_PF2IW,
@@ -2249,16 +2244,7 @@ enum ix86_builtins
IX86_BUILTIN_PSWAPDSI,
IX86_BUILTIN_PSWAPDSF,
- /* Composite builtins, expand to more than one insn. */
- IX86_BUILTIN_SETPS1,
- IX86_BUILTIN_SETPS,
- IX86_BUILTIN_CLRPS,
- IX86_BUILTIN_SETRPS,
- IX86_BUILTIN_LOADPS1,
- IX86_BUILTIN_LOADRPS,
- IX86_BUILTIN_STOREPS1,
- IX86_BUILTIN_STORERPS,
-
+ IX86_BUILTIN_SSE_ZERO,
IX86_BUILTIN_MMX_ZERO,
IX86_BUILTIN_MAX
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4673c2b..c892fc1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -81,7 +81,6 @@
;; 32 This is a `maskmov' operation.
;; 33 This is a `movmsk' operation.
;; 34 This is a `non-temporal' move.
-;; 35 This is a `prefetch' (SSE) operation.
;; 36 This is used to distinguish COMISS from UCOMISS.
;; 37 This is a `ldmxcsr' operation.
;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
@@ -17686,7 +17685,7 @@
(define_insn "movv4sf_internal"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SF 1 "general_operand" "xm,x"))]
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
@@ -17694,7 +17693,7 @@
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SI 1 "general_operand" "xm,x"))]
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
@@ -17702,28 +17701,28 @@
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V8QI 1 "general_operand" "ym,y"))]
+ (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv4hi_internal"
[(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V4HI 1 "general_operand" "ym,y"))]
+ (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2si_internal"
[(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SI 1 "general_operand" "ym,y"))]
+ (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2sf_internal"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SF 1 "general_operand" "ym,y"))]
+ (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
"TARGET_3DNOW"
"movq\\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
@@ -17734,34 +17733,10 @@
"TARGET_SSE || TARGET_64BIT"
{
if (TARGET_64BIT)
- {
- ix86_expand_move (TImode, operands);
- DONE;
- }
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], TImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (TImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], TImode)
- && !register_operand (operands[1], TImode)
- && operands[1] != CONST0_RTX (TImode))
- {
- rtx temp = force_reg (TImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_move (TImode, operands);
+ else
+ ix86_expand_vector_move (TImode, operands);
+ DONE;
})
(define_expand "movv4sf"
@@ -17769,30 +17744,8 @@
(match_operand:V4SF 1 "general_operand" ""))]
"TARGET_SSE"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V4SFmode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V4SFmode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V4SFmode)
- && !register_operand (operands[1], V4SFmode)
- && operands[1] != CONST0_RTX (V4SFmode))
- {
- rtx temp = force_reg (V4SFmode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V4SFmode, operands);
+ DONE;
})
(define_expand "movv4si"
@@ -17800,30 +17753,8 @@
(match_operand:V4SI 1 "general_operand" ""))]
"TARGET_MMX"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V4SImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V4SImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V4SImode)
- && !register_operand (operands[1], V4SImode)
- && operands[1] != CONST0_RTX (V4SImode))
- {
- rtx temp = force_reg (V4SImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V4SImode, operands);
+ DONE;
})
(define_expand "movv2si"
@@ -17831,30 +17762,8 @@
(match_operand:V2SI 1 "general_operand" ""))]
"TARGET_MMX"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V2SImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V2SImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V2SImode)
- && !register_operand (operands[1], V2SImode)
- && operands[1] != CONST0_RTX (V2SImode))
- {
- rtx temp = force_reg (V2SImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V2SImode, operands);
+ DONE;
})
(define_expand "movv4hi"
@@ -17862,30 +17771,8 @@
(match_operand:V4HI 1 "general_operand" ""))]
"TARGET_MMX"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V4HImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V4HImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V4HImode)
- && !register_operand (operands[1], V4HImode)
- && operands[1] != CONST0_RTX (V4HImode))
- {
- rtx temp = force_reg (V4HImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V4HImode, operands);
+ DONE;
})
(define_expand "movv8qi"
@@ -17893,65 +17780,18 @@
(match_operand:V8QI 1 "general_operand" ""))]
"TARGET_MMX"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V8QImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V8QImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V8QImode)
- && !register_operand (operands[1], V8QImode)
- && operands[1] != CONST0_RTX (V8QImode))
- {
- rtx temp = force_reg (V8QImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V8QImode, operands);
+ DONE;
})
(define_expand "movv2sf"
[(set (match_operand:V2SF 0 "general_operand" "")
(match_operand:V2SF 1 "general_operand" ""))]
"TARGET_3DNOW"
- "
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V2SFmode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr,
- XEXP (force_const_mem (V2SFmode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V2SFmode, addr);
- }
-
- /* Make operand1 a register is it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V2SFmode)
- && !register_operand (operands[1], V2SFmode)
- && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
- && operands[1] != CONST0_RTX (V2SFmode))
- {
- rtx temp = force_reg (V2SFmode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
-}")
+ ix86_expand_vector_move (V2SFmode, operands);
+ DONE;
+})
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
@@ -18031,25 +17871,27 @@
[(set_attr "type" "mmx")])
(define_insn "movti_internal"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
- (match_operand:TI 1 "general_operand" "xm,x"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:TI 1 "general_operand" "O,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
"@
+ xorps\t%0, %0
movaps\t{%1, %0|%0, %1}
movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "*movti_rex64"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,mx,x")
- (match_operand:TI 1 "general_operand" "riFo,riF,x,m"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
+ (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
#
#
+ xorps\t%0, %0
movaps\\t{%1, %0|%0, %1}
movaps\\t{%1, %0|%0, %1}"
- [(set_attr "type" "*,*,sse,sse")
+ [(set_attr "type" "*,*,sse,sse,sse")
(set_attr "mode" "TI")])
(define_split
@@ -18064,7 +17906,8 @@
;; movaps or movups
(define_insn "sse_movaps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))]
"TARGET_SSE"
"@
movaps\t{%1, %0|%0, %1}
@@ -18073,7 +17916,8 @@
(define_insn "sse_movups"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))]
"TARGET_SSE"
"@
movups\t{%1, %0|%0, %1}
@@ -18154,7 +17998,8 @@
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 12)))]
- "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18164,7 +18009,8 @@
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 3)))]
- "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movlps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18220,10 +18066,11 @@
(define_insn "vmaddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"addss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18231,17 +18078,18 @@
(define_insn "subv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"subps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"subss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18256,10 +18104,11 @@
(define_insn "vmmulv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"mulss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18274,10 +18123,11 @@
(define_insn "vmdivv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"divss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18287,53 +18137,57 @@
(define_insn "rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))]
"TARGET_SSE"
"rcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))]
"TARGET_SSE"
"rsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
"TARGET_SSE"
"rsqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"sqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
+ (vec_merge:V4SF
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
"TARGET_SSE"
"sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
-
;; SSE logical operations.
;; These are not called andti3 etc. because we really really don't want
@@ -18519,9 +18373,9 @@
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
-(define_insn "sse_clrti"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (unspec:TI [(const_int 0)] 45))]
+(define_insn "sse_clrv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(const_int 0)] 45))]
"TARGET_SSE"
"xorps\t{%0, %0|%0, %0}"
[(set_attr "type" "sse")
@@ -18532,8 +18386,8 @@
(define_insn "maskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(match_operator:V4SI 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")]))]
"TARGET_SSE"
"cmp%D3ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18542,24 +18396,23 @@
[(set (match_operand:V4SI 0 "register_operand" "=x")
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")])))]
"TARGET_SSE"
- "*
{
if (GET_CODE (operands[3]) == UNORDERED)
- return \"cmpordps\t{%2, %0|%0, %2}\";
-
- return \"cmpn%D3ps\t{%2, %0|%0, %2}\";
-}"
+ return "cmpordps\t{%2, %0|%0, %2}";
+ else
+ return "cmpn%D3ps\t{%2, %0|%0, %2}";
+}
[(set_attr "type" "sse")])
(define_insn "vmmaskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_merge:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "x")])
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")])
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
@@ -18571,18 +18424,17 @@
(vec_merge:V4SI
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "x")]))
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")]))
(subreg:V4SI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE"
- "*
{
if (GET_CODE (operands[3]) == UNORDERED)
- return \"cmpordss\t{%2, %0|%0, %2}\";
-
- return \"cmpn%D3ss\t{%2, %0|%0, %2}\";
-}"
+ return "cmpordss\t{%2, %0|%0, %2}";
+ else
+ return "cmpn%D3ss\t{%2, %0|%0, %2}";
+}
[(set_attr "type" "sse")])
(define_insn "sse_comi"
@@ -18663,10 +18515,11 @@
(define_insn "vmsmaxv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"maxss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18681,10 +18534,11 @@
(define_insn "vmsminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"minss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@@ -18694,56 +18548,58 @@
(define_insn "cvtpi2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (vec_duplicate:V4SF
- (float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
- (const_int 12)))]
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+ (const_int 12)))]
"TARGET_SSE"
"cvtpi2ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
- (parallel
- [(const_int 0)
- (const_int 1)])))]
+ (vec_select:V2SI
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvtps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
- (parallel
- [(const_int 0)
- (const_int 1)])))]
+ (vec_select:V2SI
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
+ (parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvttps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvtsi2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (vec_duplicate:V4SF
- (float:SF (match_operand:SI 2 "register_operand" "rm")))
- (const_int 14)))]
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 14)))]
"TARGET_SSE"
"cvtsi2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
- (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
- (parallel [(const_int 0)])))]
+ (vec_select:SI
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)])))]
"TARGET_SSE"
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
- (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
- (parallel [(const_int 0)])))]
+ (vec_select:SI
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
+ (parallel [(const_int 0)])))]
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
@@ -18877,8 +18733,10 @@
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
- (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
- (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (mult:V4SI (sign_extend:V4SI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_MMX"
"pmulhw\t{%2, %0|%0, %2}"
@@ -18888,8 +18746,10 @@
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
- (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
- (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (mult:V4SI (zero_extend:V4SI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmulhuw\t{%2, %0|%0, %2}"
@@ -18899,12 +18759,12 @@
[(set (match_operand:V2SI 0 "register_operand" "=y")
(plus:V2SI
(mult:V2SI
- (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
- (parallel [(const_int 0)
- (const_int 2)])))
- (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
- (parallel [(const_int 0)
- (const_int 2)]))))
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI (vec_select:V2HI (match_dup 1)
(parallel [(const_int 1)
@@ -19404,75 +19264,6 @@
[(set_attr "type" "sse")
(set_attr "memory" "unknown")])
-(define_expand "prefetch"
- [(prefetch (match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "const_int_operand" "n")
- (match_operand:SI 2 "const_int_operand" "n"))]
- "TARGET_PREFETCH_SSE || TARGET_3DNOW"
- "
-{
- int rw = INTVAL (operands[1]);
- int locality = INTVAL (operands[2]);
- if (rw != 0 && rw != 1)
- abort ();
- if (locality < 0 || locality > 3)
- abort ();
- /* Use 3dNOW prefetch in case we are asking for write prefetch not
- suported by SSE counterpart or the SSE prefetch is not available
- (K6 machines). Otherwise use SSE prefetch as it allows specifying
- of locality. */
- if (TARGET_3DNOW
- && (!TARGET_PREFETCH_SSE || rw))
- {
- emit_insn (gen_prefetch_3dnow (operands[0], operands[1]));
- }
- else
- {
- int i;
- switch (locality)
- {
- case 0: /* No temporal locality. */
- i = 0;
- break;
- case 1: /* Lowest level of temporal locality. */
- i = 3;
- break;
- case 2: /* Moderate level of temporal locality. */
- i = 2;
- break;
- case 3: /* Highest level of temporal locality. */
- i = 1;
- break;
- default:
- abort (); /* We already checked for valid values above. */
- break;
- }
- emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
- }
- DONE;
-}")
-
-(define_insn "prefetch_sse"
- [(unspec [(match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "immediate_operand" "n")] 35)]
- "TARGET_PREFETCH_SSE"
-{
- switch (INTVAL (operands[1]))
- {
- case 0:
- return "prefetchnta\t%a0";
- case 1:
- return "prefetcht0\t%a0";
- case 2:
- return "prefetcht1\t%a0";
- case 3:
- return "prefetcht2\t%a0";
- default:
- abort ();
- }
-}
- [(set_attr "type" "sse")])
-
(define_expand "sse_prologue_save"
[(parallel [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(reg:DI 21)
@@ -19630,19 +19421,6 @@
"femms"
[(set_attr "type" "mmx")])
-(define_insn "prefetch_3dnow"
- [(prefetch (match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "const_int_operand" "n")
- (const_int 0))]
- "TARGET_3DNOW"
-{
- if (INTVAL (operands[1]) == 0)
- return "prefetch\t%a0";
- else
- return "prefetchw\t%a0";
-}
- [(set_attr "type" "mmx")])
-
(define_insn "pf2id"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
@@ -19820,3 +19598,61 @@
"TARGET_3DNOW_A"
"pswapd\\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
+
+(define_expand "prefetch"
+ [(prefetch (match_operand:SI 0 "address_operand" "")
+ (match_operand:SI 1 "const_int_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+{
+ int rw = INTVAL (operands[1]);
+ int locality = INTVAL (operands[2]);
+ if (rw != 0 && rw != 1)
+ abort ();
+ if (locality < 0 || locality > 3)
+ abort ();
+
+ /* Use 3dNOW prefetch in case we are asking for write prefetch not
+ suported by SSE counterpart or the SSE prefetch is not available
+ (K6 machines). Otherwise use SSE prefetch as it allows specifying
+ of locality. */
+ if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+ {
+ operands[2] = GEN_INT (3);
+ }
+ else
+ {
+ operands[1] = const0_rtx;
+ }
+})
+
+(define_insn "*prefetch_sse"
+ [(prefetch (match_operand:SI 0 "address_operand" "")
+ (const_int 0)
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE"
+{
+ static const char * const patterns[4] = {
+ "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+ };
+
+ int locality = INTVAL (operands[1]);
+ if (locality < 0 || locality > 3)
+ abort ();
+
+ return patterns[locality];
+}
+ [(set_attr "type" "sse")])
+
+(define_insn "*prefetch_3dnow"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (const_int 0))]
+ "TARGET_3DNOW"
+{
+ if (INTVAL (operands[1]) == 0)
+ return "prefetch\t%a0";
+ else
+ return "prefetchw\t%a0";
+}
+ [(set_attr "type" "mmx")])
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index c515064..9f9f2f9 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -34,11 +34,11 @@
#include <mmintrin.h>
/* The data type indended for user use. */
-typedef int __m128 __attribute__ ((mode (TI)));
+typedef int __m128 __attribute__ ((__mode__(__V4SF__)));
/* Internal data types for implementing the instrinsics. */
-typedef int __v4sf __attribute__ ((mode (V4SF)));
-typedef int __v4si __attribute__ ((mode (V4SI)));
+typedef int __v4sf __attribute__ ((__mode__(__V4SF__)));
+typedef int __v4si __attribute__ ((__mode__(__V4SI__)));
/* Create a selector for use with the SHUFPS instruction. */
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
@@ -680,7 +680,7 @@ _mm_movemask_ps (__m128 __A)
static __inline unsigned int
_mm_getcsr (void)
{
- return __builtin_ia32_getmxcsr ();
+ return __builtin_ia32_stmxcsr ();
}
/* Read exception bits from the control register. */
@@ -712,7 +712,7 @@ _MM_GET_FLUSH_ZERO_MODE (void)
static __inline void
_mm_setcsr (unsigned int __I)
{
- __builtin_ia32_setmxcsr (__I);
+ __builtin_ia32_ldmxcsr (__I);
}
/* Set exception bits in the control register. */