aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2002-10-15 10:24:36 +0200
committerJan Hubicka <hubicka@gcc.gnu.org>2002-10-15 08:24:36 +0000
commit1877be457e46e1f22414e7c215805e9e51c93cea (patch)
tree2926a21bf98ecf1245514d8ae40ece750b02bc54 /gcc/config
parent0aab899b147fdb4d232e55bb9307777d07f7ebe4 (diff)
downloadgcc-1877be457e46e1f22414e7c215805e9e51c93cea.zip
gcc-1877be457e46e1f22414e7c215805e9e51c93cea.tar.gz
gcc-1877be457e46e1f22414e7c215805e9e51c93cea.tar.bz2
re PR c/7344 (performance regression on huge case statements)
* i386.md (movv2di_internal): New pattern. (movv2df_internal, movv8hi_internal, movv16qi_internal): Fix predicate. (movv2di): New expander. * i386.c (ix86_preferred_reload_class): Return NO_REGS for vector operands. * i386.c (ix86_expand_timode_binop_builtin): Delete. (builtin_description): Add SSE1 logicals; rename SSE2 logicals. (ix86_init_mmx_sse_builtins): Kill SSE1 logicals. (ix86_expand_builtin): Likewise. * i386.h (sse_andti4_df_1, sse_andti3_df_2, sse_andti3_sf_1, sse_andti3_sf_2, sse_andti3, sse_andnti4_df_1, sse_andti3_df_2, sse_andti3_sf_1, sse_andti3_sf_2, sse_andnti3, sse_orti4_df_1, sse_orti3_df_2, sse_orti3_sf_1, sse_orti3_sf_2, sse_orti3, sse_xorti4_df_1, sse_xorti3_df_2, sse_xorti3_sf_1, sse_xorti3_sf_2, sse_xorti3): Kill. (sse_andv4sf3, sse_andnv4sf3, sse_orv2df3, sse_xorv2df3, sse_andv2df3, sse_andnv2df3, sse_orv2df3, sse_xorv2df3): New expanders. (*sse_andv4sf3, *sse_andnv2df3, *sse_orv4sf3, *sse_xorv4sf3, *sse_andv2df3, *sse_andnv2df3, *sse_orv2df3, *sse_xorv2df3): New patterns. (*sse_andsf3, *sse_andndf3, *sse_ordf3, *sse_xordf3, *sse_anddf3, *sse_andndf3, *sse_orv2df3, *sse_xorv2df3): New patterns. * xmmintrin.h (__m128i): Define as __v2di. PR c/7344 * predict.c (can_predict_insn_p): New function. (estimate_probability): Avoid unnecesary work. (process_note_prediction): Likewise. * toplev.c (rest_of_compilation): Account early branch prediction pass as TV_BRANCH_PROB. PR c++/6419 (expand_expr): Use DECL_RTL_SET_P. From-SVN: r58156
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386.c74
-rw-r--r--gcc/config/i386/i386.md422
-rw-r--r--gcc/config/i386/xmmintrin.h2
3 files changed, 252 insertions, 246 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3ef4848..82b22dc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -771,8 +771,6 @@ static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
-static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
- tree, rtx));
static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
@@ -11811,6 +11809,11 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
{ MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
+ { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
+ { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
+ { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
+ { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
+
{ MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
{ MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
{ MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
@@ -11935,10 +11938,10 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
{ MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
- { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
+ { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
@@ -12443,11 +12446,6 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
- def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
- def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
- def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
- def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
-
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
@@ -12680,45 +12678,6 @@ ix86_expand_binop_builtin (icode, arglist, target)
return target;
}
-/* In type_for_mode we restrict the ability to create TImode types
- to hosts with 64-bit H_W_I. So we've defined the SSE logicals
- to have a V4SFmode signature. Convert them in-place to TImode. */
-
-static rtx
-ix86_expand_timode_binop_builtin (icode, arglist, target)
- enum insn_code icode;
- tree arglist;
- rtx target;
-{
- rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-
- op0 = gen_lowpart (TImode, op0);
- op1 = gen_lowpart (TImode, op1);
- target = gen_reg_rtx (TImode);
-
- if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
- op0 = copy_to_mode_reg (TImode, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
- op1 = copy_to_mode_reg (TImode, op1);
-
- /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
- yet one of the two must not be a memory. This is normally enforced
- by expanders, but we didn't bother to create one here. */
- if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
- op0 = copy_to_mode_reg (TImode, op0);
-
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
-
- return gen_lowpart (V4SFmode, target);
-}
-
/* Subroutine of ix86_expand_builtin to take care of stores. */
static rtx
@@ -13064,19 +13023,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_RCPSS:
return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
- case IX86_BUILTIN_ANDPS:
- return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
- arglist, target);
- case IX86_BUILTIN_ANDNPS:
- return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
- arglist, target);
- case IX86_BUILTIN_ORPS:
- return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
- arglist, target);
- case IX86_BUILTIN_XORPS:
- return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
- arglist, target);
-
case IX86_BUILTIN_LOADAPS:
return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
@@ -13553,6 +13499,8 @@ ix86_preferred_reload_class (x, class)
rtx x;
enum reg_class class;
{
+ if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
+ return NO_REGS;
if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
{
/* SSE can't load any constant directly yet. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index dd174fe..efa84c9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17823,6 +17823,15 @@
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
+(define_insn "movv2di_internal"
+ [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m")
+ (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))]
+ "TARGET_SSE"
+ ;; @@@ let's try to use movaps here.
+ "movdga\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF")])
+
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
(match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
@@ -17869,7 +17878,7 @@
(define_insn "movv2df_internal"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (match_operand:V2DF 1 "general_operand" "xm,x"))]
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
;; @@@ let's try to use movaps here.
"movapd\t{%1, %0|%0, %1}"
@@ -17878,7 +17887,7 @@
(define_insn "movv8hi_internal"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V8HI 1 "general_operand" "xm,x"))]
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
@@ -17887,7 +17896,7 @@
(define_insn "movv16qi_internal"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V16QI 1 "general_operand" "xm,x"))]
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
@@ -17933,12 +17942,21 @@
(define_expand "movv4si"
[(set (match_operand:V4SI 0 "general_operand" "")
(match_operand:V4SI 1 "general_operand" ""))]
- "TARGET_MMX"
+ "TARGET_SSE"
{
ix86_expand_vector_move (V4SImode, operands);
DONE;
})
+(define_expand "movv2di"
+ [(set (match_operand:V2DI 0 "general_operand" "")
+ (match_operand:V2DI 1 "general_operand" ""))]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_move (V2DImode, operands);
+ DONE;
+})
+
(define_expand "movv2si"
[(set (match_operand:V2SI 0 "general_operand" "")
(match_operand:V2SI 1 "general_operand" ""))]
@@ -18455,236 +18473,313 @@
;; SSE logical operations.
+;; SSE defines logical operations on floating point values. This brings
+;; interesting challenge to RTL representation where logicals are only valid
+;; on integral types. We deal with this by representing the floating point
+;; logical as logical on arguments casted to TImode as this is what hardware
+;; really does. Unfortunately hardware requires the type information to be
+;; present and thus we must avoid subregs from being simplified and elliminated
+;; in later compilation phases.
+;;
+;; We have following variants from each instruction:
+;; sse_andsf3 - the operation taking V4SF vector operands
+;; and doing TImode cast on them
+;; *sse_andsf3_memory - the operation taking one memory operand casted to
+;; TImode, since backend insist on elliminating casts
+;; on memory operands
+;; sse_andti3_sf_1 - the operation taking SF scalar operands.
+;; We can not accept memory operand here as instruction reads
+;; whole scalar. This is generated only post reload by GCC
+;; scalar float operations that expands to logicals (fabs)
+;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode
+;; memory operand. Eventually combine can be able
+;; to synthetize these using splitter.
+;; sse2_anddf3, *sse2_anddf3_memory
+;;
+;;
;; These are not called andti3 etc. because we really really don't want
;; the compiler to widen DImode ands to TImode ands and then try to move
;; into DImode subregs of SSE registers, and them together, and move out
;; of DImode subregs again!
+;; SSE1 single precision floating point logical operation
+(define_expand "sse_andv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+ (and:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE"
+ "")
-(define_insn "*sse_andti3_df_1"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
- "TARGET_SSE2"
- "andpd\t{%2, %0|%0, %2}"
+(define_insn "*sse_andv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "andps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V4SF")])
-(define_insn "*sse_andti3_df_2"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
- (match_operand:TI 2 "nonimmediate_operand" "Ym")))]
- "TARGET_SSE2"
- "andpd\t{%2, %0|%0, %2}"
+(define_insn "*sse_andsf3"
+ [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "andps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V4SF")])
-(define_insn "*sse_andti3_sf_1"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
+(define_expand "sse_nandv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+ (and:TI (not:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0))
+ (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
"TARGET_SSE"
- "andps\t{%2, %0|%0, %2}"
+ "")
+
+(define_insn "*sse_nandv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "andnps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
-(define_insn "*sse_andti3_sf_2"
+(define_insn "*sse_nandsf3"
[(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
- (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
- "andps\t{%2, %0|%0, %2}"
+ "andnps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
-(define_insn "sse_andti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+(define_expand "sse_iorv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+ (ior:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE"
+ "")
+
+(define_insn "*sse_iorv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
+ (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && !TARGET_SSE2
+ "TARGET_SSE
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "andps\t{%2, %0|%0, %2}"
+ "orps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
-(define_insn "sse2_andti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+(define_insn "*sse_iorsf3"
+ [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
+ (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2
+ "TARGET_SSE
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "pand\t{%2, %0|%0, %2}"
+ "orps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "V4SF")])
-(define_insn "sse2_andv2di3"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2
+(define_expand "sse_xorv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+ (xor:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "pand\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "TI")])
-
-(define_insn "*sse_nandti3_df"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0))
- (match_operand:TI 2 "nonimmediate_operand" "Ym")))]
- "TARGET_SSE2"
- "andnpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
+ "")
-(define_insn "*sse_nandti3_sf"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0))
+(define_insn "*sse_xorv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
+ (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "andnps\t{%2, %0|%0, %2}"
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xorps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
-(define_insn "sse_nandti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+(define_insn "*sse_xorsf3"
+ [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
+ (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && !TARGET_SSE2"
- "andnps\t{%2, %0|%0, %2}"
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xorps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
-(define_insn "sse2_nandti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
- (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+;; SSE2 double precision floating point logical operation
+
+(define_expand "sse2_andv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
+ (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
"TARGET_SSE2"
- "pandn\t{%2, %0|%0, %2}"
+ "")
+
+(define_insn "*sse2_andv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "andpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "V2DF")])
-(define_insn "sse2_nandv2di3"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
- (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
- (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+(define_insn "*sse2_andv2df3"
+ [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "pandn\t{%2, %0|%0, %2}"
+ "andpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "TI")])
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_iorti3_df_1"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
+(define_expand "sse2_nandv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
+ (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0))
+ (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
"TARGET_SSE2"
- "orpd\t{%2, %0|%0, %2}"
+ "")
+
+(define_insn "*sse2_nandv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "andnpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
-(define_insn "*sse_iorti3_df_2"
+(define_insn "*sse_nandti3_df"
[(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
(match_operand:TI 2 "nonimmediate_operand" "Ym")))]
"TARGET_SSE2"
+ "andnpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "sse2_iorv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
+ (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE2"
+ "")
+
+(define_insn "*sse2_iorv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
+ (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"orpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
-(define_insn "*sse_iorti3_sf_1"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
- "TARGET_SSE"
- "orps\t{%2, %0|%0, %2}"
+(define_insn "*sse2_iordf3"
+ [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
+ (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "orpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_iorti3_sf_2"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
+(define_expand "sse2_xorv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
+ (xor:TI (subreg:TI (match_operand:V2DF 1 "nonimmediate_operand" "") 0)
+ (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE2"
+ "")
+
+(define_insn "*sse2_xorv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
+ (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "orps\t{%2, %0|%0, %2}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xorpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "V2DF")])
-(define_insn "sse_iorti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+(define_insn "*sse2_xordf3"
+ [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
+ (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && !TARGET_SSE2
+ "TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "orps\t{%2, %0|%0, %2}"
+ "xorpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "V2DF")])
-(define_insn "sse2_iorti3"
+;; SSE2 integral logicals. These patterns must always come after floating
+;; point ones since we don't want compiler to use integer opcodes on floating
+;; point SSE values to avoid matching of subregs in the match_operand.
+(define_insn "*sse2_andti3"
[(set (match_operand:TI 0 "register_operand" "=x")
- (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "por\t{%2, %0|%0, %2}"
+ "pand\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "TI")])
-(define_insn "sse2_iorv2di3"
+(define_insn "sse2_andv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "por\t{%2, %0|%0, %2}"
+ "pand\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "TI")])
-(define_insn "*sse_xorti3_df_1"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
- "TARGET_SSE2"
- "xorpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
-
-(define_insn "*sse_xorti3_df_2"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
- (match_operand:TI 2 "nonimmediate_operand" "Ym")))]
+(define_insn "*sse2_nandti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
- "xorpd\t{%2, %0|%0, %2}"
+ "pandn\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "TI")])
-(define_insn "*sse_xorti3_sf_1"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
- "TARGET_SSE"
- "xorps\t{%2, %0|%0, %2}"
+(define_insn "sse2_nandv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "pandn\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "TI")])
-(define_insn "*sse_xorti3_sf_2"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
+(define_insn "*sse2_iorti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "xorps\t{%2, %0|%0, %2}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "por\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "TI")])
-(define_insn "sse_xorti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
- (match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && !TARGET_SSE2
+(define_insn "sse2_iorv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "xorps\t{%2, %0|%0, %2}"
+ "por\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "TI")])
-(define_insn "sse2_xorti3"
+(define_insn "*sse2_xorti3"
[(set (match_operand:TI 0 "register_operand" "=x")
(xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
@@ -18889,7 +18984,6 @@
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
-
;; SSE <-> integer/MMX conversions
(define_insn "cvtpi2ps"
@@ -20264,42 +20358,6 @@
"minsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "DF")])
-
-(define_insn "sse2_anddf3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (subreg:V2DF (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
- "TARGET_SSE2"
- "andpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_nanddf3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "0") 0))
- (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
- "TARGET_SSE2"
- "andnpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_iordf3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (subreg:V2DF (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
- "TARGET_SSE2"
- "orpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_xordf3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (subreg:V2DF (xor:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))]
- "TARGET_SSE2"
- "xorpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sselog")
- (set_attr "mode" "V2DF")])
;; SSE2 square root. There doesn't appear to be an extension for the
;; reciprocal/rsqrt instructions if the Intel manual is to be believed.
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 9442e96..fcf73ca 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1066,7 +1066,7 @@ typedef int __v4si __attribute__ ((mode (V4SI)));
typedef int __v8hi __attribute__ ((mode (V8HI)));
typedef int __v16qi __attribute__ ((mode (V16QI)));
-#define __m128i __m128
+#define __m128i __v2di
#define __m128d __v2df
static __inline __m128d