diff options
author | Jan Hubicka <jh@suse.cz> | 2002-10-15 10:24:36 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2002-10-15 08:24:36 +0000 |
commit | 1877be457e46e1f22414e7c215805e9e51c93cea (patch) | |
tree | 2926a21bf98ecf1245514d8ae40ece750b02bc54 /gcc/config | |
parent | 0aab899b147fdb4d232e55bb9307777d07f7ebe4 (diff) | |
download | gcc-1877be457e46e1f22414e7c215805e9e51c93cea.zip gcc-1877be457e46e1f22414e7c215805e9e51c93cea.tar.gz gcc-1877be457e46e1f22414e7c215805e9e51c93cea.tar.bz2 |
re PR c/7344 (performance regression on huge case statements)
* i386.md (movv2di_internal): New pattern.
(movv2df_internal, movv8hi_internal, movv16qi_internal): Fix predicate.
(movv2di): New expander.
* i386.c (ix86_preferred_reload_class): Return NO_REGS for vector operands.
* i386.c (ix86_expand_timode_binop_builtin): Delete.
(builtin_description): Add SSE1 logicals; rename SSE2 logicals.
(ix86_init_mmx_sse_builtins): Kill SSE1 logicals.
(ix86_expand_builtin): Likewise.
* i386.h (sse_andti4_df_1, sse_andti3_df_2, sse_andti3_sf_1, sse_andti3_sf_2,
sse_andti3,
sse_andnti4_df_1, sse_andti3_df_2, sse_andti3_sf_1, sse_andti3_sf_2,
sse_andnti3,
sse_orti4_df_1, sse_orti3_df_2, sse_orti3_sf_1, sse_orti3_sf_2,
sse_orti3,
sse_xorti4_df_1, sse_xorti3_df_2, sse_xorti3_sf_1, sse_xorti3_sf_2,
sse_xorti3): Kill.
(sse_andv4sf3, sse_andnv4sf3, sse_orv2df3, sse_xorv2df3, sse_andv2df3,
sse_andnv2df3, sse_orv2df3, sse_xorv2df3): New expanders.
(*sse_andv4sf3, *sse_andnv2df3, *sse_orv4sf3, *sse_xorv4sf3, *sse_andv2df3,
*sse_andnv2df3, *sse_orv2df3, *sse_xorv2df3): New patterns.
(*sse_andsf3, *sse_andndf3, *sse_ordf3, *sse_xordf3, *sse_anddf3,
*sse_andndf3, *sse_orv2df3, *sse_xorv2df3): New patterns.
* xmmintrin.h (__m128i): Define as __v2di.
PR c/7344
* predict.c (can_predict_insn_p): New function.
(estimate_probability): Avoid unnecesary work.
(process_note_prediction): Likewise.
* toplev.c (rest_of_compilation): Account early branch prediction pass
as TV_BRANCH_PROB.
PR c++/6419
(expand_expr): Use DECL_RTL_SET_P.
From-SVN: r58156
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386.c | 74 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 422 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 2 |
3 files changed, 252 insertions, 246 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3ef4848..82b22dc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -771,8 +771,6 @@ static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); -static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code, - tree, rtx)); static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); @@ -11811,6 +11809,11 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, @@ -11935,10 +11938,10 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, @@ -12443,11 +12446,6 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); - def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); - def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); - def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); - def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); @@ -12680,45 +12678,6 @@ ix86_expand_binop_builtin (icode, arglist, target) return target; } -/* In type_for_mode we restrict the ability to create TImode types - to hosts with 64-bit H_W_I. So we've defined the SSE logicals - to have a V4SFmode signature. Convert them in-place to TImode. */ - -static rtx -ix86_expand_timode_binop_builtin (icode, arglist, target) - enum insn_code icode; - tree arglist; - rtx target; -{ - rtx pat; - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); - rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); - - op0 = gen_lowpart (TImode, op0); - op1 = gen_lowpart (TImode, op1); - target = gen_reg_rtx (TImode); - - if (! (*insn_data[icode].operand[1].predicate) (op0, TImode)) - op0 = copy_to_mode_reg (TImode, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) - op1 = copy_to_mode_reg (TImode, op1); - - /* In the commutative cases, both op0 and op1 are nonimmediate_operand, - yet one of the two must not be a memory. This is normally enforced - by expanders, but we didn't bother to create one here. */ - if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) - op0 = copy_to_mode_reg (TImode, op0); - - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - - return gen_lowpart (V4SFmode, target); -} - /* Subroutine of ix86_expand_builtin to take care of stores. */ static rtx @@ -13064,19 +13023,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_RCPSS: return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); - case IX86_BUILTIN_ANDPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3, - arglist, target); - case IX86_BUILTIN_ANDNPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3, - arglist, target); - case IX86_BUILTIN_ORPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3, - arglist, target); - case IX86_BUILTIN_XORPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3, - arglist, target); - case IX86_BUILTIN_LOADAPS: return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); @@ -13553,6 +13499,8 @@ ix86_preferred_reload_class (x, class) rtx x; enum reg_class class; { + if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x))) + return NO_REGS; if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) { /* SSE can't load any constant directly yet. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index dd174fe..efa84c9 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17823,6 +17823,15 @@ [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) +(define_insn "movv2di_internal" + [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m") + (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "movdga\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] @@ -17869,7 +17878,7 @@ (define_insn "movv2df_internal" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (match_operand:V2DF 1 "general_operand" "xm,x"))] + (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" ;; @@@ let's try to use movaps here. "movapd\t{%1, %0|%0, %1}" @@ -17878,7 +17887,7 @@ (define_insn "movv8hi_internal" [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") - (match_operand:V8HI 1 "general_operand" "xm,x"))] + (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" @@ -17887,7 +17896,7 @@ (define_insn "movv16qi_internal" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (match_operand:V16QI 1 "general_operand" "xm,x"))] + (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" @@ -17933,12 +17942,21 @@ (define_expand "movv4si" [(set (match_operand:V4SI 0 "general_operand" "") (match_operand:V4SI 1 "general_operand" ""))] - "TARGET_MMX" + "TARGET_SSE" { ix86_expand_vector_move (V4SImode, operands); DONE; }) +(define_expand "movv2di" + [(set (match_operand:V2DI 0 "general_operand" "") + (match_operand:V2DI 1 "general_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V2DImode, operands); + DONE; +}) + (define_expand "movv2si" [(set (match_operand:V2SI 0 "general_operand" "") (match_operand:V2SI 1 "general_operand" ""))] @@ -18455,236 +18473,313 @@ ;; SSE logical operations. +;; SSE defines logical operations on floating point values. This brings +;; interesting challenge to RTL representation where logicals are only valid +;; on integral types. We deal with this by representing the floating point +;; logical as logical on arguments casted to TImode as this is what hardware +;; really does. Unfortunately hardware requires the type information to be +;; present and thus we must avoid subregs from being simplified and elliminated +;; in later compilation phases. +;; +;; We have following variants from each instruction: +;; sse_andsf3 - the operation taking V4SF vector operands +;; and doing TImode cast on them +;; *sse_andsf3_memory - the operation taking one memory operand casted to +;; TImode, since backend insist on elliminating casts +;; on memory operands +;; sse_andti3_sf_1 - the operation taking SF scalar operands. +;; We can not accept memory operand here as instruction reads +;; whole scalar. This is generated only post reload by GCC +;; scalar float operations that expands to logicals (fabs) +;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode +;; memory operand. Eventually combine can be able +;; to synthetize these using splitter. +;; sse2_anddf3, *sse2_anddf3_memory +;; +;; ;; These are not called andti3 etc. because we really really don't want ;; the compiler to widen DImode ands to TImode ands and then try to move ;; into DImode subregs of SSE registers, and them together, and move out ;; of DImode subregs again! +;; SSE1 single precision floating point logical operation +(define_expand "sse_andv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (and:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") -(define_insn "*sse_andti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" +(define_insn "*sse_andv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" +(define_insn "*sse_andsf3" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] +(define_expand "sse_nandv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (and:TI (not:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" + "") + +(define_insn "*sse_nandv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sf_2" +(define_insn "*sse_nandsf3" [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" + "andnps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "sse_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_expand "sse_iorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (ior:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") + +(define_insn "*sse_iorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andps\t{%2, %0|%0, %2}" + "orps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "sse2_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_insn "*sse_iorsf3" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" + "orps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) + (set_attr "mode" "V4SF")]) -(define_insn "sse2_andv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 +(define_expand "sse_xorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (xor:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "*sse_nandti3_df" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + "") -(define_insn "*sse_nandti3_sf" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)) +(define_insn "*sse_xorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "sse_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) +(define_insn "*sse_xorsf3" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2" - "andnps\t{%2, %0|%0, %2}" + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "sse2_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] +;; SSE2 double precision floating point logical operation + +(define_expand "sse2_andv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE2" - "pandn\t{%2, %0|%0, %2}" + "") + +(define_insn "*sse2_andv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) + (set_attr "mode" "V2DF")]) -(define_insn "sse2_nandv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")) - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] +(define_insn "*sse2_andv2df3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pandn\t{%2, %0|%0, %2}" + "andpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] +(define_expand "sse2_nandv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" + "") + +(define_insn "*sse2_nandv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_df_2" +(define_insn "*sse_nandti3_df" [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) (match_operand:TI 2 "nonimmediate_operand" "Ym")))] "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_iorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_iorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "orpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" +(define_insn "*sse2_iordf3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "orpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) +(define_expand "sse2_xorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (xor:TI (subreg:TI (match_operand:V2DF 1 "nonimmediate_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_xorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "V2DF")]) -(define_insn "sse_iorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_insn "*sse2_xordf3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orps\t{%2, %0|%0, %2}" + "xorpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "V2DF")]) -(define_insn "sse2_iorti3" +;; SSE2 integral logicals. These patterns must always come after floating +;; point ones since we don't want compiler to use integer opcodes on floating +;; point SSE values to avoid matching of subregs in the match_operand. +(define_insn "*sse2_andti3" [(set (match_operand:TI 0 "register_operand" "=x") - (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" + "pand\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "TI")]) -(define_insn "sse2_iorv2di3" +(define_insn "sse2_andv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" + "pand\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "*sse_xorti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] +(define_insn "*sse2_nandti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" + "pandn\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" +(define_insn "sse2_nandv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")) + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pandn\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) +(define_insn "*sse2_iorti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "por\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "TI")]) -(define_insn "sse_xorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 +(define_insn "sse2_iorv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorps\t{%2, %0|%0, %2}" + "por\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "TI")]) -(define_insn "sse2_xorti3" +(define_insn "*sse2_xorti3" [(set (match_operand:TI 0 "register_operand" "=x") (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] @@ -18889,7 +18984,6 @@ [(set_attr "type" "sse") (set_attr "mode" "SF")]) - ;; SSE <-> integer/MMX conversions (define_insn "cvtpi2ps" @@ -20264,42 +20358,6 @@ "minsd\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") (set_attr "mode" "DF")]) - -(define_insn "sse2_anddf3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (subreg:V2DF (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_nanddf3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "0") 0)) - (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_iordf3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (subreg:V2DF (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))] - "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_xordf3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (subreg:V2DF (xor:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))] - "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) ;; SSE2 square root. There doesn't appear to be an extension for the ;; reciprocal/rsqrt instructions if the Intel manual is to be believed. diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 9442e96..fcf73ca 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1066,7 +1066,7 @@ typedef int __v4si __attribute__ ((mode (V4SI))); typedef int __v8hi __attribute__ ((mode (V8HI))); typedef int __v16qi __attribute__ ((mode (V16QI))); -#define __m128i __m128 +#define __m128i __v2di #define __m128d __v2df static __inline __m128d |