diff options
-rw-r--r-- | gcc/ChangeLog | 40 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 74 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 422 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 2 | ||||
-rw-r--r-- | gcc/expr.c | 5 | ||||
-rw-r--r-- | gcc/final.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/i386-ssetype-1.c | 32 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/i386-ssetype-2.c | 40 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/i386-ssetype-3.c | 32 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/i386-ssetype-4.c | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/i386-ssetype-5.c | 33 |
12 files changed, 475 insertions, 249 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6238c6e..d045018 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,41 @@ +Mon Oct 14 20:33:12 CEST 2002 Jan Hubicka <jh@suse.cz> + + * i386.md (movv2di_internal): New pattern. + (movv2df_internal, movv8hi_internal, movv16qi_internal): Fix predicate. + (movv2di): New expander. + * i386.c (ix86_preferred_reload_class): Return NO_REGS for vector operands. + + * i386.c (ix86_expand_timode_binop_builtin): Delete. + (builtin_description): Add SSE1 logicals; rename SSE2 logicals. + (ix86_init_mmx_sse_builtins): Kill SSE1 logicals. + (ix86_expand_builtin): Likewise. + * i386.h (sse_andti4_df_1, sse_andti3_df_2, sse_andti3_sf_1, sse_andti3_sf_2, + sse_andti3, + sse_andnti4_df_1, sse_andti3_df_2, sse_andti3_sf_1, sse_andti3_sf_2, + sse_andnti3, + sse_orti4_df_1, sse_orti3_df_2, sse_orti3_sf_1, sse_orti3_sf_2, + sse_orti3, + sse_xorti4_df_1, sse_xorti3_df_2, sse_xorti3_sf_1, sse_xorti3_sf_2, + sse_xorti3): Kill. + (sse_andv4sf3, sse_andnv4sf3, sse_orv2df3, sse_xorv2df3, sse_andv2df3, + sse_andnv2df3, sse_orv2df3, sse_xorv2df3): New expanders. + (*sse_andv4sf3, *sse_andnv2df3, *sse_orv4sf3, *sse_xorv4sf3, *sse_andv2df3, + *sse_andnv2df3, *sse_orv2df3, *sse_xorv2df3): New patterns. + (*sse_andsf3, *sse_andndf3, *sse_ordf3, *sse_xordf3, *sse_anddf3, + *sse_andndf3, *sse_orv2df3, *sse_xorv2df3): New patterns. + + * xmmintrin.h (__m128i): Define as __v2di. + + PR c/7344 + * predict.c (can_predict_insn_p): New function. + (estimate_probability): Avoid unnecesary work. + (process_note_prediction): Likewise. + * toplev.c (rest_of_compilation): Account early branch prediction pass + as TV_BRANCH_PROB. + + PR c++/6419 + (expand_expr): Use DECL_RTL_SET_P. + 2002-10-14 Roger Sayle <roger@eyesopen.com> * combine.c (simplify_set): Treat MODE_CC registers like cc0. @@ -136,7 +174,7 @@ Fri Oct 11 22:22:38 CEST 2002 Jan Hubicka <jh@suse.cz> PR c/7344 * cfgbuild.c (make_edges): Create edge cache when we do have large jumptable. - (do_tablejump): Note size of maximal jumptable. + * expr.c (do_tablejump): Note size of maximal jumptable. * function.c (prepare_function_start): Zero out size. * function.h (function): Add max_jumptable_ents. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3ef4848..82b22dc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -771,8 +771,6 @@ static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *, static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx)); static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int)); static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); -static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code, - tree, rtx)); static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree)); static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode)); static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code)); @@ -11811,6 +11809,11 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 }, { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 }, + { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 }, { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 }, { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 }, @@ -11935,10 +11938,10 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 }, { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 }, @@ -12443,11 +12446,6 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); - def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS); - def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS); - def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS); - def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS); - def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); @@ -12680,45 +12678,6 @@ ix86_expand_binop_builtin (icode, arglist, target) return target; } -/* In type_for_mode we restrict the ability to create TImode types - to hosts with 64-bit H_W_I. So we've defined the SSE logicals - to have a V4SFmode signature. Convert them in-place to TImode. */ - -static rtx -ix86_expand_timode_binop_builtin (icode, arglist, target) - enum insn_code icode; - tree arglist; - rtx target; -{ - rtx pat; - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); - rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); - - op0 = gen_lowpart (TImode, op0); - op1 = gen_lowpart (TImode, op1); - target = gen_reg_rtx (TImode); - - if (! (*insn_data[icode].operand[1].predicate) (op0, TImode)) - op0 = copy_to_mode_reg (TImode, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, TImode)) - op1 = copy_to_mode_reg (TImode, op1); - - /* In the commutative cases, both op0 and op1 are nonimmediate_operand, - yet one of the two must not be a memory. This is normally enforced - by expanders, but we didn't bother to create one here. */ - if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM) - op0 = copy_to_mode_reg (TImode, op0); - - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - - return gen_lowpart (V4SFmode, target); -} - /* Subroutine of ix86_expand_builtin to take care of stores. */ static rtx @@ -13064,19 +13023,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_RCPSS: return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target); - case IX86_BUILTIN_ANDPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3, - arglist, target); - case IX86_BUILTIN_ANDNPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3, - arglist, target); - case IX86_BUILTIN_ORPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3, - arglist, target); - case IX86_BUILTIN_XORPS: - return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3, - arglist, target); - case IX86_BUILTIN_LOADAPS: return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1); @@ -13553,6 +13499,8 @@ ix86_preferred_reload_class (x, class) rtx x; enum reg_class class; { + if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x))) + return NO_REGS; if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) { /* SSE can't load any constant directly yet. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index dd174fe..efa84c9 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17823,6 +17823,15 @@ [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) +(define_insn "movv2di_internal" + [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m") + (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "movdga\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] @@ -17869,7 +17878,7 @@ (define_insn "movv2df_internal" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (match_operand:V2DF 1 "general_operand" "xm,x"))] + (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" ;; @@@ let's try to use movaps here. "movapd\t{%1, %0|%0, %1}" @@ -17878,7 +17887,7 @@ (define_insn "movv8hi_internal" [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") - (match_operand:V8HI 1 "general_operand" "xm,x"))] + (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" @@ -17887,7 +17896,7 @@ (define_insn "movv16qi_internal" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (match_operand:V16QI 1 "general_operand" "xm,x"))] + (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" @@ -17933,12 +17942,21 @@ (define_expand "movv4si" [(set (match_operand:V4SI 0 "general_operand" "") (match_operand:V4SI 1 "general_operand" ""))] - "TARGET_MMX" + "TARGET_SSE" { ix86_expand_vector_move (V4SImode, operands); DONE; }) +(define_expand "movv2di" + [(set (match_operand:V2DI 0 "general_operand" "") + (match_operand:V2DI 1 "general_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V2DImode, operands); + DONE; +}) + (define_expand "movv2si" [(set (match_operand:V2SI 0 "general_operand" "") (match_operand:V2SI 1 "general_operand" ""))] @@ -18455,236 +18473,313 @@ ;; SSE logical operations. +;; SSE defines logical operations on floating point values. This brings +;; interesting challenge to RTL representation where logicals are only valid +;; on integral types. We deal with this by representing the floating point +;; logical as logical on arguments casted to TImode as this is what hardware +;; really does. Unfortunately hardware requires the type information to be +;; present and thus we must avoid subregs from being simplified and elliminated +;; in later compilation phases. +;; +;; We have following variants from each instruction: +;; sse_andsf3 - the operation taking V4SF vector operands +;; and doing TImode cast on them +;; *sse_andsf3_memory - the operation taking one memory operand casted to +;; TImode, since backend insist on elliminating casts +;; on memory operands +;; sse_andti3_sf_1 - the operation taking SF scalar operands. +;; We can not accept memory operand here as instruction reads +;; whole scalar. This is generated only post reload by GCC +;; scalar float operations that expands to logicals (fabs) +;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode +;; memory operand. Eventually combine can be able +;; to synthetize these using splitter. +;; sse2_anddf3, *sse2_anddf3_memory +;; +;; ;; These are not called andti3 etc. because we really really don't want ;; the compiler to widen DImode ands to TImode ands and then try to move ;; into DImode subregs of SSE registers, and them together, and move out ;; of DImode subregs again! +;; SSE1 single precision floating point logical operation +(define_expand "sse_andv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (and:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") -(define_insn "*sse_andti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" +(define_insn "*sse_andv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" +(define_insn "*sse_andsf3" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] +(define_expand "sse_nandv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (and:TI (not:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" + "") + +(define_insn "*sse_nandv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sf_2" +(define_insn "*sse_nandsf3" [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" + "andnps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "sse_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_expand "sse_iorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (ior:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") + +(define_insn "*sse_iorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andps\t{%2, %0|%0, %2}" + "orps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "sse2_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_insn "*sse_iorsf3" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" + "orps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) + (set_attr "mode" "V4SF")]) -(define_insn "sse2_andv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 +(define_expand "sse_xorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (xor:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) - -(define_insn "*sse_nandti3_df" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + "") -(define_insn "*sse_nandti3_sf" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)) +(define_insn "*sse_xorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "sse_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) +(define_insn "*sse_xorsf3" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2" - "andnps\t{%2, %0|%0, %2}" + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorps\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V4SF")]) -(define_insn "sse2_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] +;; SSE2 double precision floating point logical operation + +(define_expand "sse2_andv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE2" - "pandn\t{%2, %0|%0, %2}" + "") + +(define_insn "*sse2_andv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) + (set_attr "mode" "V2DF")]) -(define_insn "sse2_nandv2di3" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")) - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] +(define_insn "*sse2_andv2df3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pandn\t{%2, %0|%0, %2}" + "andpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "TI")]) + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] +(define_expand "sse2_nandv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" + "") + +(define_insn "*sse2_nandv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_df_2" +(define_insn "*sse_nandti3_df" [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) (match_operand:TI 2 "nonimmediate_operand" "Ym")))] "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_iorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_iorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "orpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" +(define_insn "*sse2_iordf3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "orpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) +(define_expand "sse2_xorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (xor:TI (subreg:TI (match_operand:V2DF 1 "nonimmediate_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_xorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "V2DF")]) -(define_insn "sse_iorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_insn "*sse2_xordf3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orps\t{%2, %0|%0, %2}" + "xorpd\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "V2DF")]) -(define_insn "sse2_iorti3" +;; SSE2 integral logicals. These patterns must always come after floating +;; point ones since we don't want compiler to use integer opcodes on floating +;; point SSE values to avoid matching of subregs in the match_operand. +(define_insn "*sse2_andti3" [(set (match_operand:TI 0 "register_operand" "=x") - (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" + "pand\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "TI")]) -(define_insn "sse2_iorv2di3" +(define_insn "sse2_andv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" + "pand\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "*sse_xorti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] +(define_insn "*sse2_nandti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" + "pandn\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) + (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" +(define_insn "sse2_nandv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")) + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pandn\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) +(define_insn "*sse2_iorti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "por\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "TI")]) -(define_insn "sse_xorti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 +(define_insn "sse2_iorv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorps\t{%2, %0|%0, %2}" + "por\t{%2, %0|%0, %2}" [(set_attr "type" "sselog") - (set_attr "mode" "V4SF")]) + (set_attr "mode" "TI")]) -(define_insn "sse2_xorti3" +(define_insn "*sse2_xorti3" [(set (match_operand:TI 0 "register_operand" "=x") (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] @@ -18889,7 +18984,6 @@ [(set_attr "type" "sse") (set_attr "mode" "SF")]) - ;; SSE <-> integer/MMX conversions (define_insn "cvtpi2ps" @@ -20264,42 +20358,6 @@ "minsd\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") (set_attr "mode" "DF")]) - -(define_insn "sse2_anddf3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (subreg:V2DF (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_nanddf3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "0") 0)) - (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_iordf3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (subreg:V2DF (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))] - "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) - -(define_insn "sse2_xordf3" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (subreg:V2DF (xor:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "xm") 0)) 0))] - "TARGET_SSE2" - "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sselog") - (set_attr "mode" "V2DF")]) ;; SSE2 square root. There doesn't appear to be an extension for the ;; reciprocal/rsqrt instructions if the Intel manual is to be believed. diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 9442e96..fcf73ca 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1066,7 +1066,7 @@ typedef int __v4si __attribute__ ((mode (V4SI))); typedef int __v8hi __attribute__ ((mode (V8HI))); typedef int __v16qi __attribute__ ((mode (V16QI))); -#define __m128i __m128 +#define __m128i __v2di #define __m128d __v2df static __inline __m128d @@ -6532,7 +6532,7 @@ expand_expr (exp, target, tmode, modifier) } case PARM_DECL: - if (DECL_RTL (exp) == 0) + if (!DECL_RTL_SET_P (exp)) { error_with_decl (exp, "prior parameter's size depends on `%s'"); return CONST0_RTX (mode); @@ -10942,6 +10942,9 @@ do_tablejump (index, mode, range, table_label, default_label) { rtx temp, vector; + if (range > cfun->max_jumptable_ents) + cfun->max_jumptable_ents = range; + /* Do an unsigned comparison (in the proper mode) between the index expression and the value which represents the length of the range. Since we just finished subtracting the lower bound of the range diff --git a/gcc/final.c b/gcc/final.c index 2f5c823..5b11665 100644 --- a/gcc/final.c +++ b/gcc/final.c @@ -997,7 +997,7 @@ compute_alignments () align it. It is most likely an first block of loop. */ if (has_fallthru && branch_frequency + fallthru_frequency > BB_FREQ_MAX / 10 - && branch_frequency > fallthru_frequency * 5) + && branch_frequency > fallthru_frequency * 2) { log = LOOP_ALIGN (label); if (max_log < log) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6e6c3e2..c857013 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +Mon Oct 14 20:37:51 CEST 2002 Jan Hubicka <jh@suse.cz> + + * gcc.dg/i386-ssetype-[1-5].c: New tests. + 2002-10-14 Richard Henderson <rth@redhat.com> * gcc.dg/20020219-1.c: Disable for 16-bit targets. diff --git a/gcc/testsuite/gcc.dg/i386-ssetype-1.c b/gcc/testsuite/gcc.dg/i386-ssetype-1.c new file mode 100644 index 0000000..e8411ff --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssetype-1.c @@ -0,0 +1,32 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -msse2 -march=athlon" } */ +/* { dg-final { scan-assembler "andpd.*\[bs\]p" } } */ +/* { dg-final { scan-assembler "andnpd.*\[bs\]p" } } */ +/* { dg-final { scan-assembler "xorpd.*\[bs\]p" } } */ +/* { dg-final { scan-assembler "iorpd.*\[bs\]p" } } */ +/* { dg-final { scan-assembler-not "movdqa" } } */ +/* { dg-final { scan-assembler "movapd.*\[bs\]p" } } */ + +/* Verify that we generate proper instruction with memory operand. */ + +#include <xmmintrin.h> +__m128d +t1(__m128d a, __m128d b) +{ +return _mm_and_pd (a,b); +} +__m128d +t2(__m128d a, __m128d b) +{ +return _mm_andnot_pd (a,b); +} +__m128d +t3(__m128d a, __m128d b) +{ +return _mm_or_pd (a,b); +} +__m128d +t4(__m128d a, __m128d b) +{ +return _mm_xor_pd (a,b); +} diff --git a/gcc/testsuite/gcc.dg/i386-ssetype-2.c b/gcc/testsuite/gcc.dg/i386-ssetype-2.c new file mode 100644 index 0000000..377e173 --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssetype-2.c @@ -0,0 +1,40 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -msse2 -march=athlon" } */ +/* { dg-final { scan-assembler "andpd" } } */ +/* { dg-final { scan-assembler "andnpd" } } */ +/* { dg-final { scan-assembler "xorpd" } } */ +/* { dg-final { scan-assembler "iorpd" } } */ +/* { dg-final { scan-assembler-not "movdqa" } } */ +/* { dg-final { scan-assembler "movapd" } } */ + +/* Verify that we generate proper instruction without memory operand. */ + +#include <xmmintrin.h> +__m128d +t1(__m128d a, __m128d b) +{ +a=_mm_sqrt_pd(a); +b=_mm_sqrt_pd(b); +return _mm_and_pd (a,b); +} +__m128d +t2(__m128d a, __m128d b) +{ +a=_mm_sqrt_pd(a); +b=_mm_sqrt_pd(b); +return _mm_andnot_pd (a,b); +} +__m128d +t3(__m128d a, __m128d b) +{ +a=_mm_sqrt_pd(a); +b=_mm_sqrt_pd(b); +return _mm_or_pd (a,b); +} +__m128d +t4(__m128d a, __m128d b) +{ +a=_mm_sqrt_pd(a); +b=_mm_sqrt_pd(b); +return _mm_xor_pd (a,b); +} diff --git a/gcc/testsuite/gcc.dg/i386-ssetype-3.c b/gcc/testsuite/gcc.dg/i386-ssetype-3.c new file mode 100644 index 0000000..1d1d7b0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssetype-3.c @@ -0,0 +1,32 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -msse2 -march=athlon" } */ +/* { dg-final { scan-assembler "andps.*\[bs]p" } } */ +/* { dg-final { scan-assembler "andnps.*\[bs]p" } } */ +/* { dg-final { scan-assembler "xorps.*\[bs]p" } } */ +/* { dg-final { scan-assembler "orps.\[b*s]p" } } */ +/* { dg-final { scan-assembler-not "movdqa" } } */ +/* { dg-final { scan-assembler "movaps.*\[bs]p" } } */ + +/* Verify that we generate proper instruction with memory operand. */ + +#include <xmmintrin.h> +__m128 +t1(__m128 a, __m128 b) +{ +return _mm_and_ps (a,b); +} +__m128 +t2(__m128 a, __m128 b) +{ +return _mm_andnot_ps (a,b); +} +__m128 +t3(__m128 a, __m128 b) +{ +return _mm_or_ps (a,b); +} +__m128 +t4(__m128 a, __m128 b) +{ +return _mm_xor_ps (a,b); +} diff --git a/gcc/testsuite/gcc.dg/i386-ssetype-4.c b/gcc/testsuite/gcc.dg/i386-ssetype-4.c new file mode 100644 index 0000000..ef6a93f --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssetype-4.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -msse2 -march=athlon" } */ +/* { dg-final { scan-assembler "andps" } } */ +/* { dg-final { scan-assembler "andnps" } } */ +/* { dg-final { scan-assembler "xorps" } } */ +/* { dg-final { scan-assembler "orps" } } */ + +/* Verify that we generate proper instruction without memory operand. */ + +#include <xmmintrin.h> +__m128 +t1(__m128 a, __m128 b) +{ +a=_mm_sqrt_ps(a); +b=_mm_sqrt_ps(b); +return _mm_and_ps (a,b); +} +__m128 +t2(__m128 a, __m128 b) +{ +a=_mm_sqrt_ps(a); +b=_mm_sqrt_ps(b); +return _mm_andnot_ps (a,b); +} +__m128 +t3(__m128 a, __m128 b) +{ +a=_mm_sqrt_ps(a); +b=_mm_sqrt_ps(b); +return _mm_or_ps (a,b); +} +__m128 +t4(__m128 a, __m128 b) +{ +a=_mm_sqrt_ps(a); +b=_mm_sqrt_ps(b); +return _mm_xor_ps (a,b); +} diff --git a/gcc/testsuite/gcc.dg/i386-ssetype-5.c b/gcc/testsuite/gcc.dg/i386-ssetype-5.c new file mode 100644 index 0000000..97cc22f --- /dev/null +++ b/gcc/testsuite/gcc.dg/i386-ssetype-5.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -msse2 -march=athlon" } */ +/* { dg-final { scan-assembler "pand.*\[bs\]p" } } */ +/* { dg-final { scan-assembler "pandn.*\[bs\]p" } } */ +/* { dg-final { scan-assembler "pxor.*\[bs\]p" } } */ +/* { dg-final { scan-assembler "por.*\[bs\]p" } } */ +/* { dg-final { scan-assembler "movdqa" } } */ +/* { dg-final { scan-assembler-not "movaps.*\[bs\]p" } } */ + +/* Verify that we generate proper instruction with memory operand. */ + +#include <xmmintrin.h> +__m128i +t1(__m128i a, __m128i b) +{ +return _mm_and_si128 (a,b); +} +__m128i +t2(__m128i a, __m128i b) +{ +return _mm_andnot_si128 (a,b); +} +__m128i +t3(__m128i a, __m128i b) +{ +return _mm_or_si128 (a,b); +} +__m128i +t4(__m128i a, __m128i b) +{ +return _mm_xor_si128 (a,b); +} + |