diff options
-rw-r--r-- | gcc/ChangeLog | 22 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 281 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 16 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 562 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 10 |
6 files changed, 335 insertions, 558 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d5e4eb9..93ceae3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2002-01-12 Richard Henderson <rth@redhat.com> + + * config/i386/i386.c (override_options): If SSE, enable sse prefetch. + (ix86_expand_vector_move): New. + (bdesc_2arg): Remove andps, andnps, orps, xorps. + (ix86_init_mmx_sse_builtins): Make static. Remove composite builtins. + Remove old prefetch builtins. Special case the logicals removed above. + (ix86_expand_builtin): Likewise. + (safe_vector_operand): Use V4SFmode, not TImode. + (ix86_expand_store_builtin): Remove shuffle arg. Update callers. + (ix86_expand_timode_binop_builtin): New. + * config/i386/i386-protos.h: Update. + * config/i386/i386.h (enum ix86_builtins): Update. + * config/i386/i386.md: Correct predicates on MMX/SSE patterns. + Use ix86_expand_vector_move in vector move expanders. + (movti_internal, movti_rex64): Add xorps alternative. + (sse_clrv4sf): Rename and adjust from sse_clrti. + (prefetch): Don't work so hard. + (prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC. + * config/i386/xmmintrin.h (__m128): Use V4SFmode. + (_mm_getcsr, _mm_setcsr): Fix typo in builtin name. + 2002-01-11 Richard Henderson <rth@redhat.com> * config/i386/mmintrin.h: New file. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 42a8f4a..01c4d44 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -108,6 +108,7 @@ extern rtx i386_simplify_dwarf_addr PARAMS ((rtx)); extern void ix86_expand_clear PARAMS ((rtx)); extern void ix86_expand_move PARAMS ((enum machine_mode, rtx[])); +extern void ix86_expand_vector_move PARAMS ((enum machine_mode, rtx[])); extern void ix86_expand_binary_operator PARAMS ((enum rtx_code, enum machine_mode, rtx[])); extern int ix86_binary_operator_ok PARAMS ((enum rtx_code, enum machine_mode, @@ -177,7 +178,6 @@ extern void function_arg_advance PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, tree, int)); extern rtx ix86_function_value PARAMS ((tree)); extern void ix86_init_builtins PARAMS ((void)); -extern void ix86_init_mmx_sse_builtins PARAMS ((void)); extern rtx ix86_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int)); #endif diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8eebf5f..08c9ca6 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -684,6 +684,7 @@ static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static void ix86_sched_init PARAMS ((FILE *, int, int)); static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int)); +static void ix86_init_mmx_sse_builtins PARAMS ((void)); struct ix86_address { @@ -701,7 +702,9 @@ static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); -static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int)); +static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code, + tree, rtx)); +static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code, @@ -1164,7 +1167,10 @@ override_options () /* It makes no sense to ask for just SSE builtins, so MMX is also turned on by -msse. */ if (TARGET_SSE) - target_flags |= MASK_MMX; + { + target_flags |= MASK_MMX; + x86_prefetch_sse = true; + } /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */ if (TARGET_3DNOW) @@ -6661,6 +6667,38 @@ ix86_expand_move (mode, operands) emit_insn (insn); } +void +ix86_expand_vector_move (mode, operands) + enum machine_mode mode; + rtx operands[]; +{ + /* Force constants other than zero into memory. We do not know how + the instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], mode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (mode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], mode) + && !register_operand (operands[1], mode) + && operands[1] != CONST0_RTX (mode)) + { + rtx temp = force_reg (TImode, operands[1]); + emit_move_insn (operands[0], temp); + return; + } + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); +} + /* Attempt to expand a binary operator. Make the expansion closer to the actual machine, then just general_operand, which will allow 3 separate memory references (one output, two input) in a single insn. */ @@ -10748,11 +10786,6 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, - { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, @@ -10865,7 +10898,7 @@ ix86_init_builtins () /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX builtins. */ -void +static void ix86_init_mmx_sse_builtins () { const struct builtin_description * d; @@ -10899,14 +10932,6 @@ ix86_init_mmx_sse_builtins () = build_function_type (integer_type_node, tree_cons (NULL_TREE, V8QI_type_node, endlink)); - tree int_ftype_v2si - = build_function_type (integer_type_node, - tree_cons (NULL_TREE, V2SI_type_node, - endlink)); - tree v2si_ftype_int - = build_function_type (V2SI_type_node, - tree_cons (NULL_TREE, integer_type_node, - endlink)); tree v4sf_ftype_v4sf_int = build_function_type (V4SF_type_node, tree_cons (NULL_TREE, V4SF_type_node, @@ -10976,11 +11001,6 @@ ix86_init_mmx_sse_builtins () endlink))); tree void_ftype_void = build_function_type (void_type_node, endlink); - tree void_ftype_pchar_int - = build_function_type (void_type_node, - tree_cons (NULL_TREE, pchar_type_node, - tree_cons (NULL_TREE, integer_type_node, - endlink))); tree void_ftype_unsigned = build_function_type (void_type_node, tree_cons (NULL_TREE, unsigned_type_node, @@ -10989,8 +11009,8 @@ ix86_init_mmx_sse_builtins () = build_function_type (unsigned_type_node, endlink); tree di_ftype_void = build_function_type (long_long_unsigned_type_node, endlink); - tree ti_ftype_void - = build_function_type (intTI_type_node, endlink); + tree v4sf_ftype_void + = build_function_type (V4SF_type_node, endlink); tree v2si_ftype_v4sf = build_function_type (V2SI_type_node, tree_cons (NULL_TREE, V4SF_type_node, @@ -11007,19 +11027,6 @@ ix86_init_mmx_sse_builtins () = build_function_type (V4SF_type_node, tree_cons (NULL_TREE, pfloat_type_node, endlink)); - tree v4sf_ftype_float - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, float_type_node, - endlink)); - tree v4sf_ftype_float_float_float_float - = build_function_type (V4SF_type_node, - tree_cons (NULL_TREE, float_type_node, - tree_cons (NULL_TREE, float_type_node, - tree_cons (NULL_TREE, - float_type_node, - tree_cons (NULL_TREE, - float_type_node, - endlink))))); /* @@@ the type is bogus */ tree v4sf_ftype_v4sf_pv2si = build_function_type (V4SF_type_node, @@ -11069,11 +11076,6 @@ ix86_init_mmx_sse_builtins () tree_cons (NULL_TREE, V2SI_type_node, tree_cons (NULL_TREE, V2SI_type_node, endlink))); - tree ti_ftype_ti_ti - = build_function_type (intTI_type_node, - tree_cons (NULL_TREE, intTI_type_node, - tree_cons (NULL_TREE, intTI_type_node, - endlink))); tree di_ftype_di_di = build_function_type (long_long_unsigned_type_node, tree_cons (NULL_TREE, long_long_unsigned_type_node, @@ -11110,11 +11112,6 @@ ix86_init_mmx_sse_builtins () V2SF_type_node, endlink))); - tree void_ftype_pchar - = build_function_type (void_type_node, - tree_cons (NULL_TREE, pchar_type_node, - endlink)); - /* Add all builtins that are more or less simple operations on two operands. */ for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++) @@ -11142,9 +11139,6 @@ ix86_init_mmx_sse_builtins () case V2SImode: type = v2si_ftype_v2si_v2si; break; - case TImode: - type = ti_ftype_ti_ti; - break; case DImode: type = di_ftype_di_di; break; @@ -11164,8 +11158,6 @@ ix86_init_mmx_sse_builtins () } /* Add the remaining MMX insns with somewhat more complicated types. */ - def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT); - def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT); def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO); def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS); def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); @@ -11199,6 +11191,11 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); + def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); + def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); + def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); @@ -11222,7 +11219,6 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ); def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE); - def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH); def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW); @@ -11256,8 +11252,6 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR); def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD); def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW); - def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW); - def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW); /* 3DNow! extension as used in the Athlon CPU. */ def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW); @@ -11267,14 +11261,7 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF); def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI); - /* Composite intrinsics. */ - def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1); - def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS); - def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS); - def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1); - def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS); - def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1); - def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS); + def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO); } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -11293,8 +11280,8 @@ safe_vector_operand (x, mode) emit_insn (gen_mmx_clrdi (mode == DImode ? x : gen_rtx_SUBREG (DImode, x, 0))); else - emit_insn (gen_sse_clrti (mode == TImode ? x - : gen_rtx_SUBREG (TImode, x, 0))); + emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x + : gen_rtx_SUBREG (V4SFmode, x, 0))); return x; } @@ -11342,13 +11329,45 @@ ix86_expand_binop_builtin (icode, arglist, target) return target; } +/* In type_for_mode we restrict the ability to create TImode types + to hosts with 64-bit H_W_I. So we've defined the SSE logicals + to have a V4SFmode signature. Convert them in-place to TImode. */ + +static rtx +ix86_expand_timode_binop_builtin (icode, arglist, target) + enum insn_code icode; + tree arglist; + rtx target; +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + + op0 = gen_lowpart (TImode, op0); + op1 = gen_lowpart (TImode, op1); + target = gen_reg_rtx (TImode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, TImode)) + op0 = copy_to_mode_reg (TImode, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) + op1 = copy_to_mode_reg (TImode, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + + return gen_lowpart (V4SFmode, target); +} + /* Subroutine of ix86_expand_builtin to take care of stores. */ static rtx -ix86_expand_store_builtin (icode, arglist, shuffle) +ix86_expand_store_builtin (icode, arglist) enum insn_code icode; tree arglist; - int shuffle; { rtx pat; tree arg0 = TREE_VALUE (arglist); @@ -11362,10 +11381,6 @@ ix86_expand_store_builtin (icode, arglist, shuffle) op1 = safe_vector_operand (op1, mode1); op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - if (shuffle >= 0) - emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle))); pat = GEN_FCN (icode) (op0, op1); if (pat) emit_insn (pat); @@ -11568,7 +11583,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) enum insn_code icode; tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); tree arglist = TREE_OPERAND (exp, 1); - tree arg0, arg1, arg2, arg3; + tree arg0, arg1, arg2; rtx op0, op1, op2, pat; enum machine_mode tmode, mode0, mode1, mode2; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); @@ -11583,19 +11598,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) emit_insn (gen_sfence ()); return 0; - case IX86_BUILTIN_M_FROM_INT: - target = gen_reg_rtx (DImode); - op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); - emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0); - return target; - - case IX86_BUILTIN_M_TO_INT: - op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); - op0 = copy_to_mode_reg (DImode, op0); - target = gen_reg_rtx (SImode); - emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0)); - return target; - case IX86_BUILTIN_PEXTRW: icode = CODE_FOR_mmx_pextrw; arg0 = TREE_VALUE (arglist); @@ -11689,6 +11691,19 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_RCPSS: return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); + case IX86_BUILTIN_ANDPS: + return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3, + arglist, target); + case IX86_BUILTIN_ANDNPS: + return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3, + arglist, target); + case IX86_BUILTIN_ORPS: + return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3, + arglist, target); + case IX86_BUILTIN_XORPS: + return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3, + arglist, target); + case IX86_BUILTIN_LOADAPS: return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); @@ -11696,15 +11711,15 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1); case IX86_BUILTIN_STOREAPS: - return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1); + return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist); case IX86_BUILTIN_STOREUPS: - return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1); + return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist); case IX86_BUILTIN_LOADSS: return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1); case IX86_BUILTIN_STORESS: - return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1); + return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist); case IX86_BUILTIN_LOADHPS: case IX86_BUILTIN_LOADLPS: @@ -11753,9 +11768,9 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) return 0; case IX86_BUILTIN_MOVNTPS: - return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1); + return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); case IX86_BUILTIN_MOVNTQ: - return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1); + return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist); case IX86_BUILTIN_LDMXCSR: op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0); @@ -11769,29 +11784,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) emit_insn (gen_stmxcsr (target)); return copy_to_mode_reg (SImode, target); - case IX86_BUILTIN_PREFETCH: - icode = CODE_FOR_prefetch_sse; - arg0 = TREE_VALUE (arglist); - arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); - op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); - mode0 = insn_data[icode].operand[0].mode; - mode1 = insn_data[icode].operand[1].mode; - - if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) - { - /* @@@ better error message */ - error ("selector must be an immediate"); - return const0_rtx; - } - - op0 = copy_to_mode_reg (Pmode, op0); - pat = GEN_FCN (icode) (op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; - case IX86_BUILTIN_SHUFPS: icode = CODE_FOR_sse_shufps; arg0 = TREE_VALUE (arglist); @@ -11914,19 +11906,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_PMULHRW: return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target); - case IX86_BUILTIN_PREFETCH_3DNOW: - case IX86_BUILTIN_PREFETCHW: - icode = CODE_FOR_prefetch_3dnow; - arg0 = TREE_VALUE (arglist); - op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); - op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx); - mode0 = insn_data[icode].operand[0].mode; - pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1); - if (! pat) - return NULL_RTX; - emit_insn (pat); - return NULL_RTX; - case IX86_BUILTIN_PF2IW: return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0); @@ -11945,57 +11924,11 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_PSWAPDSF: return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0); - /* Composite intrinsics. */ - case IX86_BUILTIN_SETPS1: - target = assign_386_stack_local (SFmode, 0); - arg0 = TREE_VALUE (arglist); - emit_move_insn (adjust_address (target, SFmode, 0), - expand_expr (arg0, NULL_RTX, VOIDmode, 0)); - op0 = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0))); - emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0))); - return op0; - - case IX86_BUILTIN_SETPS: - target = assign_386_stack_local (V4SFmode, 0); - arg0 = TREE_VALUE (arglist); - arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); - arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); - emit_move_insn (adjust_address (target, SFmode, 0), - expand_expr (arg0, NULL_RTX, VOIDmode, 0)); - emit_move_insn (adjust_address (target, SFmode, 4), - expand_expr (arg1, NULL_RTX, VOIDmode, 0)); - emit_move_insn (adjust_address (target, SFmode, 8), - expand_expr (arg2, NULL_RTX, VOIDmode, 0)); - emit_move_insn (adjust_address (target, SFmode, 12), - expand_expr (arg3, NULL_RTX, VOIDmode, 0)); - op0 = gen_reg_rtx (V4SFmode); - emit_insn (gen_sse_movaps (op0, target)); - return op0; - - case IX86_BUILTIN_CLRPS: - target = gen_reg_rtx (TImode); - emit_insn (gen_sse_clrti (target)); - return target; - - case IX86_BUILTIN_LOADRPS: - target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, - gen_reg_rtx (V4SFmode), 1); - emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b))); + case IX86_BUILTIN_SSE_ZERO: + target = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_clrv4sf (target)); return target; - case IX86_BUILTIN_LOADPS1: - target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, - gen_reg_rtx (V4SFmode), 1); - emit_insn (gen_sse_shufps (target, target, target, const0_rtx)); - return target; - - case IX86_BUILTIN_STOREPS1: - return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0); - case IX86_BUILTIN_STORERPS: - return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B); - case IX86_BUILTIN_MMX_ZERO: target = gen_reg_rtx (DImode); emit_insn (gen_mmx_clrdi (target)); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f7f569d..8f8e208 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2089,8 +2089,6 @@ enum ix86_builtins IX86_BUILTIN_CVTSS2SI, IX86_BUILTIN_CVTTPS2PI, IX86_BUILTIN_CVTTSS2SI, - IX86_BUILTIN_M_FROM_INT, - IX86_BUILTIN_M_TO_INT, IX86_BUILTIN_MAXPS, IX86_BUILTIN_MAXSS, @@ -2215,7 +2213,6 @@ enum ix86_builtins IX86_BUILTIN_LDMXCSR, IX86_BUILTIN_STMXCSR, IX86_BUILTIN_SFENCE, - IX86_BUILTIN_PREFETCH, /* 3DNow! Original */ IX86_BUILTIN_FEMMS, @@ -2238,8 +2235,6 @@ enum ix86_builtins IX86_BUILTIN_PFSUBR, IX86_BUILTIN_PI2FD, IX86_BUILTIN_PMULHRW, - IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */ - IX86_BUILTIN_PREFETCHW, /* 3DNow! Athlon Extensions */ IX86_BUILTIN_PF2IW, @@ -2249,16 +2244,7 @@ enum ix86_builtins IX86_BUILTIN_PSWAPDSI, IX86_BUILTIN_PSWAPDSF, - /* Composite builtins, expand to more than one insn. */ - IX86_BUILTIN_SETPS1, - IX86_BUILTIN_SETPS, - IX86_BUILTIN_CLRPS, - IX86_BUILTIN_SETRPS, - IX86_BUILTIN_LOADPS1, - IX86_BUILTIN_LOADRPS, - IX86_BUILTIN_STOREPS1, - IX86_BUILTIN_STORERPS, - + IX86_BUILTIN_SSE_ZERO, IX86_BUILTIN_MMX_ZERO, IX86_BUILTIN_MAX diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4673c2b..c892fc1 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -81,7 +81,6 @@ ;; 32 This is a `maskmov' operation. ;; 33 This is a `movmsk' operation. ;; 34 This is a `non-temporal' move. -;; 35 This is a `prefetch' (SSE) operation. ;; 36 This is used to distinguish COMISS from UCOMISS. ;; 37 This is a `ldmxcsr' operation. ;; 38 This is a forced `movaps' instruction (rather than whatever movti does) @@ -17686,7 +17685,7 @@ (define_insn "movv4sf_internal" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SF 1 "general_operand" "xm,x"))] + (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" @@ -17694,7 +17693,7 @@ (define_insn "movv4si_internal" [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SI 1 "general_operand" "xm,x"))] + (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" @@ -17702,28 +17701,28 @@ (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") - (match_operand:V8QI 1 "general_operand" "ym,y"))] + (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] "TARGET_MMX" "movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) (define_insn "movv4hi_internal" [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") - (match_operand:V4HI 1 "general_operand" "ym,y"))] + (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] "TARGET_MMX" "movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) (define_insn "movv2si_internal" [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SI 1 "general_operand" "ym,y"))] + (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] "TARGET_MMX" "movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) (define_insn "movv2sf_internal" [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SF 1 "general_operand" "ym,y"))] + (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] "TARGET_3DNOW" "movq\\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) @@ -17734,34 +17733,10 @@ "TARGET_SSE || TARGET_64BIT" { if (TARGET_64BIT) - { - ix86_expand_move (TImode, operands); - DONE; - } - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], TImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (TImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], TImode) - && !register_operand (operands[1], TImode) - && operands[1] != CONST0_RTX (TImode)) - { - rtx temp = force_reg (TImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_move (TImode, operands); + else + ix86_expand_vector_move (TImode, operands); + DONE; }) (define_expand "movv4sf" @@ -17769,30 +17744,8 @@ (match_operand:V4SF 1 "general_operand" ""))] "TARGET_SSE" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V4SFmode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V4SFmode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V4SFmode) - && !register_operand (operands[1], V4SFmode) - && operands[1] != CONST0_RTX (V4SFmode)) - { - rtx temp = force_reg (V4SFmode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V4SFmode, operands); + DONE; }) (define_expand "movv4si" @@ -17800,30 +17753,8 @@ (match_operand:V4SI 1 "general_operand" ""))] "TARGET_MMX" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V4SImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V4SImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V4SImode) - && !register_operand (operands[1], V4SImode) - && operands[1] != CONST0_RTX (V4SImode)) - { - rtx temp = force_reg (V4SImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V4SImode, operands); + DONE; }) (define_expand "movv2si" @@ -17831,30 +17762,8 @@ (match_operand:V2SI 1 "general_operand" ""))] "TARGET_MMX" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V2SImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V2SImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V2SImode) - && !register_operand (operands[1], V2SImode) - && operands[1] != CONST0_RTX (V2SImode)) - { - rtx temp = force_reg (V2SImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V2SImode, operands); + DONE; }) (define_expand "movv4hi" @@ -17862,30 +17771,8 @@ (match_operand:V4HI 1 "general_operand" ""))] "TARGET_MMX" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V4HImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V4HImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V4HImode) - && !register_operand (operands[1], V4HImode) - && operands[1] != CONST0_RTX (V4HImode)) - { - rtx temp = force_reg (V4HImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V4HImode, operands); + DONE; }) (define_expand "movv8qi" @@ -17893,65 +17780,18 @@ (match_operand:V8QI 1 "general_operand" ""))] "TARGET_MMX" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V8QImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V8QImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V8QImode) - && !register_operand (operands[1], V8QImode) - && operands[1] != CONST0_RTX (V8QImode)) - { - rtx temp = force_reg (V8QImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V8QImode, operands); + DONE; }) (define_expand "movv2sf" [(set (match_operand:V2SF 0 "general_operand" "") (match_operand:V2SF 1 "general_operand" ""))] "TARGET_3DNOW" - " { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V2SFmode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, - XEXP (force_const_mem (V2SFmode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V2SFmode, addr); - } - - /* Make operand1 a register is it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V2SFmode) - && !register_operand (operands[1], V2SFmode) - && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0) - && operands[1] != CONST0_RTX (V2SFmode)) - { - rtx temp = force_reg (V2SFmode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } -}") + ix86_expand_vector_move (V2SFmode, operands); + DONE; +}) (define_insn_and_split "*pushti" [(set (match_operand:TI 0 "push_operand" "=<") @@ -18031,25 +17871,27 @@ [(set_attr "type" "mmx")]) (define_insn "movti_internal" - [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m") - (match_operand:TI 1 "general_operand" "xm,x"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "general_operand" "O,xm,x"))] "TARGET_SSE && !TARGET_64BIT" "@ + xorps\t%0, %0 movaps\t{%1, %0|%0, %1} movaps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,x,m"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") + (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ # # + xorps\t%0, %0 movaps\\t{%1, %0|%0, %1} movaps\\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,sse,sse") + [(set_attr "type" "*,*,sse,sse,sse") (set_attr "mode" "TI")]) (define_split @@ -18064,7 +17906,8 @@ ;; movaps or movups (define_insn "sse_movaps" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))] + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))] "TARGET_SSE" "@ movaps\t{%1, %0|%0, %1} @@ -18073,7 +17916,8 @@ (define_insn "sse_movups" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))] + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))] "TARGET_SSE" "@ movups\t{%1, %0|%0, %1} @@ -18154,7 +17998,8 @@ (match_operand:V4SF 1 "nonimmediate_operand" "0,0") (match_operand:V4SF 2 "nonimmediate_operand" "m,x") (const_int 12)))] - "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "TARGET_SSE + && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" "movhps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18164,7 +18009,8 @@ (match_operand:V4SF 1 "nonimmediate_operand" "0,0") (match_operand:V4SF 2 "nonimmediate_operand" "m,x") (const_int 3)))] - "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "TARGET_SSE + && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" "movlps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18220,10 +18066,11 @@ (define_insn "vmaddv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "addss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18231,17 +18078,18 @@ (define_insn "subv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "subps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) (define_insn "vmsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "subss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18256,10 +18104,11 @@ (define_insn "vmmulv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "mulss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18274,10 +18123,11 @@ (define_insn "vmdivv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "divss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18287,53 +18137,57 @@ (define_insn "rcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))] + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))] "TARGET_SSE" "rcpps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "vmrcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] "TARGET_SSE" "rcpss\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "rsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))] + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))] "TARGET_SSE" "rsqrtps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "vmrsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] "TARGET_SSE" "rsqrtss\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "sqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))] + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] "TARGET_SSE" "sqrtps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "vmsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] + (vec_merge:V4SF + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] "TARGET_SSE" "sqrtss\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) - ;; SSE logical operations. ;; These are not called andti3 etc. because we really really don't want @@ -18519,9 +18373,9 @@ ;; Use xor, but don't show input operands so they aren't live before ;; this insn. -(define_insn "sse_clrti" - [(set (match_operand:TI 0 "register_operand" "=x") - (unspec:TI [(const_int 0)] 45))] +(define_insn "sse_clrv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(const_int 0)] 45))] "TARGET_SSE" "xorps\t{%0, %0|%0, %0}" [(set_attr "type" "sse") @@ -18532,8 +18386,8 @@ (define_insn "maskcmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "x")]))] + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")]))] "TARGET_SSE" "cmp%D3ps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18542,24 +18396,23 @@ [(set (match_operand:V4SI 0 "register_operand" "=x") (not:V4SI (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "x")])))] + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")])))] "TARGET_SSE" - "* { if (GET_CODE (operands[3]) == UNORDERED) - return \"cmpordps\t{%2, %0|%0, %2}\"; - - return \"cmpn%D3ps\t{%2, %0|%0, %2}\"; -}" + return "cmpordps\t{%2, %0|%0, %2}"; + else + return "cmpn%D3ps\t{%2, %0|%0, %2}"; +} [(set_attr "type" "sse")]) (define_insn "vmmaskcmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_merge:V4SI (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "x")]) + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")]) (match_dup 1) (const_int 1)))] "TARGET_SSE" @@ -18571,18 +18424,17 @@ (vec_merge:V4SI (not:V4SI (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "x")])) + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")])) (subreg:V4SI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE" - "* { if (GET_CODE (operands[3]) == UNORDERED) - return \"cmpordss\t{%2, %0|%0, %2}\"; - - return \"cmpn%D3ss\t{%2, %0|%0, %2}\"; -}" + return "cmpordss\t{%2, %0|%0, %2}"; + else + return "cmpn%D3ss\t{%2, %0|%0, %2}"; +} [(set_attr "type" "sse")]) (define_insn "sse_comi" @@ -18663,10 +18515,11 @@ (define_insn "vmsmaxv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "maxss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18681,10 +18534,11 @@ (define_insn "vmsminv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "minss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18694,56 +18548,58 @@ (define_insn "cvtpi2ps" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") - (vec_duplicate:V4SF - (float:V2SF (match_operand:V2SI 2 "register_operand" "ym"))) - (const_int 12)))] + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) + (const_int 12)))] "TARGET_SSE" "cvtpi2ps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) (define_insn "cvtps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm")) - (parallel - [(const_int 0) - (const_int 1)])))] + (vec_select:V2SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "cvtps2pi\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "cvttps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30) - (parallel - [(const_int 0) - (const_int 1)])))] + (vec_select:V2SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "cvttps2pi\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "cvtsi2ss" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") - (vec_duplicate:V4SF - (float:SF (match_operand:SI 2 "register_operand" "rm"))) - (const_int 14)))] + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 14)))] "TARGET_SSE" "cvtsi2ss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") - (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm")) - (parallel [(const_int 0)])))] + (vec_select:SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0)])))] "TARGET_SSE" "cvtss2si\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "cvttss2si" [(set (match_operand:SI 0 "register_operand" "=r") - (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30) - (parallel [(const_int 0)])))] + (vec_select:SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (parallel [(const_int 0)])))] "TARGET_SSE" "cvttss2si\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) @@ -18877,8 +18733,10 @@ [(set (match_operand:V4HI 0 "register_operand" "=y") (truncate:V4HI (lshiftrt:V4SI - (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (mult:V4SI (sign_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) (const_int 16))))] "TARGET_MMX" "pmulhw\t{%2, %0|%0, %2}" @@ -18888,8 +18746,10 @@ [(set (match_operand:V4HI 0 "register_operand" "=y") (truncate:V4HI (lshiftrt:V4SI - (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) - (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (mult:V4SI (zero_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (zero_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) (const_int 16))))] "TARGET_SSE || TARGET_3DNOW_A" "pmulhuw\t{%2, %0|%0, %2}" @@ -18899,12 +18759,12 @@ [(set (match_operand:V2SI 0 "register_operand" "=y") (plus:V2SI (mult:V2SI - (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0) - (const_int 2)])))) + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2)]))) + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) (const_int 2)])))) (mult:V2SI (sign_extend:V2SI (vec_select:V2HI (match_dup 1) (parallel [(const_int 1) @@ -19404,75 +19264,6 @@ [(set_attr "type" "sse") (set_attr "memory" "unknown")]) -(define_expand "prefetch" - [(prefetch (match_operand:SI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (match_operand:SI 2 "const_int_operand" "n"))] - "TARGET_PREFETCH_SSE || TARGET_3DNOW" - " -{ - int rw = INTVAL (operands[1]); - int locality = INTVAL (operands[2]); - if (rw != 0 && rw != 1) - abort (); - if (locality < 0 || locality > 3) - abort (); - /* Use 3dNOW prefetch in case we are asking for write prefetch not - suported by SSE counterpart or the SSE prefetch is not available - (K6 machines). Otherwise use SSE prefetch as it allows specifying - of locality. */ - if (TARGET_3DNOW - && (!TARGET_PREFETCH_SSE || rw)) - { - emit_insn (gen_prefetch_3dnow (operands[0], operands[1])); - } - else - { - int i; - switch (locality) - { - case 0: /* No temporal locality. */ - i = 0; - break; - case 1: /* Lowest level of temporal locality. */ - i = 3; - break; - case 2: /* Moderate level of temporal locality. */ - i = 2; - break; - case 3: /* Highest level of temporal locality. */ - i = 1; - break; - default: - abort (); /* We already checked for valid values above. */ - break; - } - emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i))); - } - DONE; -}") - -(define_insn "prefetch_sse" - [(unspec [(match_operand:SI 0 "address_operand" "p") - (match_operand:SI 1 "immediate_operand" "n")] 35)] - "TARGET_PREFETCH_SSE" -{ - switch (INTVAL (operands[1])) - { - case 0: - return "prefetchnta\t%a0"; - case 1: - return "prefetcht0\t%a0"; - case 2: - return "prefetcht1\t%a0"; - case 3: - return "prefetcht2\t%a0"; - default: - abort (); - } -} - [(set_attr "type" "sse")]) - (define_expand "sse_prologue_save" [(parallel [(set (match_operand:BLK 0 "" "") (unspec:BLK [(reg:DI 21) @@ -19630,19 +19421,6 @@ "femms" [(set_attr "type" "mmx")]) -(define_insn "prefetch_3dnow" - [(prefetch (match_operand:SI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (const_int 0))] - "TARGET_3DNOW" -{ - if (INTVAL (operands[1]) == 0) - return "prefetch\t%a0"; - else - return "prefetchw\t%a0"; -} - [(set_attr "type" "mmx")]) - (define_insn "pf2id" [(set (match_operand:V2SI 0 "register_operand" "=y") (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] @@ -19820,3 +19598,61 @@ "TARGET_3DNOW_A" "pswapd\\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) + +(define_expand "prefetch" + [(prefetch (match_operand:SI 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" +{ + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + if (rw != 0 && rw != 1) + abort (); + if (locality < 0 || locality > 3) + abort (); + + /* Use 3dNOW prefetch in case we are asking for write prefetch not + suported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + { + operands[2] = GEN_INT (3); + } + else + { + operands[1] = const0_rtx; + } +}) + +(define_insn "*prefetch_sse" + [(prefetch (match_operand:SI 0 "address_operand" "") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + if (locality < 0 || locality > 3) + abort (); + + return patterns[locality]; +} + [(set_attr "type" "sse")]) + +(define_insn "*prefetch_3dnow" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 0))] + "TARGET_3DNOW" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx")]) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index c515064..9f9f2f9 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -34,11 +34,11 @@ #include <mmintrin.h> /* The data type indended for user use. */ -typedef int __m128 __attribute__ ((mode (TI))); +typedef int __m128 __attribute__ ((__mode__(__V4SF__))); /* Internal data types for implementing the instrinsics. */ -typedef int __v4sf __attribute__ ((mode (V4SF))); -typedef int __v4si __attribute__ ((mode (V4SI))); +typedef int __v4sf __attribute__ ((__mode__(__V4SF__))); +typedef int __v4si __attribute__ ((__mode__(__V4SI__))); /* Create a selector for use with the SHUFPS instruction. */ #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ @@ -680,7 +680,7 @@ _mm_movemask_ps (__m128 __A) static __inline unsigned int _mm_getcsr (void) { - return __builtin_ia32_getmxcsr (); + return __builtin_ia32_stmxcsr (); } /* Read exception bits from the control register. */ @@ -712,7 +712,7 @@ _MM_GET_FLUSH_ZERO_MODE (void) static __inline void _mm_setcsr (unsigned int __I) { - __builtin_ia32_setmxcsr (__I); + __builtin_ia32_ldmxcsr (__I); } /* Set exception bits in the control register. */ |