aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2004-12-22 23:49:24 -0800
committerRichard Henderson <rth@gcc.gnu.org>2004-12-22 23:49:24 -0800
commit1c47af84a30650b28b6d1957f1689e2107396a2e (patch)
tree63ed3848efe9dfc80d70dfe0829b304716de04c8 /gcc/config
parent5e5f01b969fcc0449e7826dae1343f10bfca69e8 (diff)
downloadgcc-1c47af84a30650b28b6d1957f1689e2107396a2e.zip
gcc-1c47af84a30650b28b6d1957f1689e2107396a2e.tar.gz
gcc-1c47af84a30650b28b6d1957f1689e2107396a2e.tar.bz2
emmintrin.h (_mm_loadh_pd): Don't cast pointer arg to __v2si.
* config/i386/emmintrin.h (_mm_loadh_pd): Don't cast pointer arg to __v2si. (_mm_storeh_pd, _mm_loadl_pd, _mm_storel_pd): Likewise. * config/i386/i386.c (ix86_init_mmx_sse_builtins): Use double* or const double* for __builtin_ia32_loadhpd, __builtin_ia32_loadlpd, __builtin_ia32_storehpd, __builtin_ia32_storelpd. (ix86_expand_builtin): Update to match. (ix86_expand_vector_init): Use sse2_loadlpd. * config/i386/i386.md (vec_setv2df): Use sse2_loadlpd, sse2_loadhpd. (vec_extractv2df): Use sse2_storelpd, sse2_storehpd. (sse2_storehpd, sse2_loadhpd, sse2_storelpd, sse2_loadlpd): New. (sse2_movhpd): Remove. From-SVN: r92536
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/emmintrin.h8
-rw-r--r--gcc/config/i386/i386.c77
-rw-r--r--gcc/config/i386/i386.md138
3 files changed, 150 insertions, 73 deletions
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 67450e4..49c6a7f 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -937,25 +937,25 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B)
static __inline __m128d
_mm_loadh_pd (__m128d __A, double const *__B)
{
- return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B);
+ return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
}
static __inline void
_mm_storeh_pd (double *__A, __m128d __B)
{
- __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B);
+ __builtin_ia32_storehpd (__A, (__v2df)__B);
}
static __inline __m128d
_mm_loadl_pd (__m128d __A, double const *__B)
{
- return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B);
+ return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
}
static __inline void
_mm_storel_pd (double *__A, __m128d __B)
{
- __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B);
+ __builtin_ia32_storelpd (__A, (__v2df)__B);
}
static __inline int
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ef5f37e..2a9dca2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12551,12 +12551,9 @@ ix86_init_mmx_sse_builtins (void)
V2DF_type_node, V2DF_type_node,
integer_type_node,
NULL_TREE);
- tree v2df_ftype_v2df_pv2si
+ tree v2df_ftype_v2df_pcdouble
= build_function_type_list (V2DF_type_node,
- V2DF_type_node, pv2si_type_node, NULL_TREE);
- tree void_ftype_pv2si_v2df
- = build_function_type_list (void_type_node,
- pv2si_type_node, V2DF_type_node, NULL_TREE);
+ V2DF_type_node, pcdouble_type_node, NULL_TREE);
tree void_ftype_pdouble_v2df
= build_function_type_list (void_type_node,
pdouble_type_node, V2DF_type_node, NULL_TREE);
@@ -12858,10 +12855,10 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREHPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORELPD);
def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
@@ -13405,8 +13402,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_LOADLPD:
icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
: fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
- : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
- : CODE_FOR_sse2_movsd);
+ : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
+ : CODE_FOR_sse2_loadlpd);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
@@ -13430,12 +13427,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_STOREHPS:
case IX86_BUILTIN_STORELPS:
- case IX86_BUILTIN_STOREHPD:
- case IX86_BUILTIN_STORELPD:
icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
- : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
- : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
- : CODE_FOR_sse2_movsd);
+ : CODE_FOR_sse_movlps);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
@@ -13451,7 +13444,28 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (! pat)
return 0;
emit_insn (pat);
- return 0;
+ return const0_rtx;
+
+ case IX86_BUILTIN_STOREHPD:
+ case IX86_BUILTIN_STORELPD:
+ icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
+ : CODE_FOR_sse2_storelpd);
+ arg0 = TREE_VALUE (arglist);
+ arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+ mode0 = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+
+ op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+ if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return const0_rtx;
case IX86_BUILTIN_MOVNTPS:
return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
@@ -15189,24 +15203,29 @@ ix86_expand_vector_init (rtx target, rtx vals)
/* ... values where only first field is non-constant are best loaded
from the pool and overwritten via move later. */
- if (!i)
+ if (i == 0)
{
- rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
- GET_MODE_INNER (mode), 0);
-
- op = force_reg (mode, op);
XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+
switch (GET_MODE (target))
{
- case V2DFmode:
- emit_insn (gen_sse2_movsd (target, target, op));
- break;
- case V4SFmode:
+ case V2DFmode:
+ emit_insn (gen_sse2_loadlpd (target, target, XVECEXP (vals, 0, 0)));
+ break;
+
+ case V4SFmode:
+ {
+ /* ??? We can represent this better. */
+ rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
+ GET_MODE_INNER (mode), 0);
+ op = force_reg (mode, op);
emit_insn (gen_sse_movss (target, target, op));
- break;
- default:
- break;
+ }
+ break;
+
+ default:
+ break;
}
return;
}
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 648748c..ff0f9f9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4734,16 +4734,10 @@
switch (INTVAL (operands[2]))
{
case 0:
- emit_insn (gen_sse2_movsd (operands[0], operands[0],
- simplify_gen_subreg (V2DFmode, operands[1],
- DFmode, 0)));
+ emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1]));
break;
case 1:
- {
- rtx op1 = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
-
- emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], op1));
- }
+ emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1]));
break;
default:
abort ();
@@ -4760,14 +4754,10 @@
switch (INTVAL (operands[2]))
{
case 0:
- emit_move_insn (operands[0], gen_lowpart (DFmode, operands[1]));
+ emit_insn (gen_sse2_storelpd (operands[0], operands[1]));
break;
case 1:
- {
- rtx dest = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
-
- emit_insn (gen_sse2_unpckhpd (dest, operands[1], operands[1]));
- }
+ emit_insn (gen_sse2_storehpd (operands[0], operands[1]));
break;
default:
abort ();
@@ -23731,17 +23721,103 @@
[(set_attr "type" "ssemov")
(set_attr "mode" "TI")])
-(define_insn "sse2_movhpd"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (vec_merge:V2DF
- (match_operand:V2DF 1 "nonimmediate_operand" "0,0")
- (match_operand:V2DF 2 "nonimmediate_operand" "m,x")
- (const_int 1)))]
- "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
- "movhpd\t{%2, %0|%0, %2}"
+;; Store the high double of the source vector into the double destination.
+(define_insn "sse2_storehpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,Y,Y")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " Y,0,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2"
+ "@
+ movhpd\t{%1, %0|%0, %1}
+ unpckhpd\t%0, %0
+ #"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (operands[0], adjust_address (operands[1], DFmode, 8));
+ DONE;
+})
+
+;; Load the high double of the target vector from the source scalar.
+(define_insn "sse2_loadhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,o")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" " m,Y,Y")))]
+ "TARGET_SSE2"
+ "@
+ movhpd\t{%2, %0|%0, %2}
+ unpcklpd\t{%2, %0|%0, %2}
+ #"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+ (match_operand:DF 1 "register_operand" "")))]
+ "TARGET_SSE2 && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (adjust_address (operands[0], DFmode, 8), operands[1]);
+ DONE;
+})
+
+;; Store the low double of the source vector into the double destination.
+(define_expand "sse2_storelpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2"
+{
+ operands[1] = gen_lowpart (DFmode, operands[1]);
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+;; Load the load double of the target vector from the source scalar.
+(define_insn "sse2_loadlpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" " m,Y,Y")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2"
+ "@
+ movlpd\t{%2, %0|%0, %2}
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+;; Merge the low part of the source vector into the low part of the target.
+(define_insn "sse2_movsd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
+ (vec_merge:V2DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "x,m,Y")
+ (const_int 2)))]
+ "TARGET_SSE2"
+ "@movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF,V2DF,V2DF")])
+
(define_expand "sse2_loadsd"
[(match_operand:V2DF 0 "register_operand" "")
(match_operand:DF 1 "memory_operand" "")]
@@ -23763,24 +23839,6 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
-;; ??? We ought to be using ix86_binary_operator_ok on this pattern, so
-;; that we enforce the whole matching memory thing through combine et al.
-;; But that requires that things be set up properly when invoked via an
-;; intrinsic, which we don't do. Which leads to instantiate virtual regs
-;; lossage, as seen compiling gcc.dg/i386-sse-2.c for x86_64 at -O0.
-(define_insn "sse2_movsd"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
- (vec_merge:V2DF
- (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0")
- (match_operand:V2DF 2 "nonimmediate_operand" "x,m,x")
- (const_int 2)))]
- "TARGET_SSE2"
- "@movsd\t{%2, %0|%0, %2}
- movlpd\t{%2, %0|%0, %2}
- movlpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "ssecvt")
- (set_attr "mode" "DF,V2DF,V2DF")])
-
(define_insn "sse2_storesd"
[(set (match_operand:DF 0 "memory_operand" "=m")
(vec_select:DF