aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2003-02-22 03:09:06 +0100
committerJan Hubicka <hubicka@gcc.gnu.org>2003-02-22 02:09:06 +0000
commit453ee2313a9f013158f1c5ab8bd97cf495c5c270 (patch)
treec74c0b1a2b314570d121c6928bed7609ac436b20 /gcc
parentd50672efa744b4492ca692e4cffcd7c9cdd55e37 (diff)
downloadgcc-453ee2313a9f013158f1c5ab8bd97cf495c5c270.zip
gcc-453ee2313a9f013158f1c5ab8bd97cf495c5c270.tar.gz
gcc-453ee2313a9f013158f1c5ab8bd97cf495c5c270.tar.bz2
i386.c (def_builtin): Special case 64bit builtins.
* i386.c (def_builtin): Special case 64bit builtins. (MASK_SSE164, MASK_SSE264): New constants. (builtin_description): Add 64bit builtins. (ix86_init_mmx_sse_builtins): Likewise. * i386.h (enum ix86_builtins): Likewise. * i386.md (cvtss2siq, cvttss2siq, cvtsd2siq, cvttsd2siq, cvtsi2sdq, sse2_movq2dq_rex64, sse2_movsq2q_rex64): New. (sse2_movq2dq, sse2_movsq2q): Disable for 64bit. * mmintrin.h (_mm_cvtsi64x_si64, _mm_set_pi64x, _mm_cvtsi64_si64x): New. * xmmintrin.h (_mm_cvtss_si64x, _mm_cvttss_si64x, _mm_cvtsi64x_ss, _mm_set_epi64x, _mm_set1_epi64x, _mm_cvtsd_si64x, _mm_cvttsd_si64x, _mm_cvtsi64x_sd, _mm_cvtsi64x_si128, _mm_cvtsi128_si64x): New. From-SVN: r63267
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/config/i386/i386.c31
-rw-r--r--gcc/config/i386/i386.h6
-rw-r--r--gcc/config/i386/i386.md83
-rw-r--r--gcc/config/i386/mmintrin.h25
-rw-r--r--gcc/config/i386/xmmintrin.h95
6 files changed, 252 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 993fe0c..746458b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+Sat Feb 22 02:35:07 CET 2003 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (def_builtin): Special case 64bit builtins.
+ (MASK_SSE164, MASK_SSE264): New constants.
+ (builtin_description): Add 64bit builtins.
+ (ix86_init_mmx_sse_builtins): Likewise.
+ * i386.h (enum ix86_builtins): Likewise.
+ * i386.md (cvtss2siq, cvttss2siq, cvtsd2siq, cvttsd2siq, cvtsi2sdq,
+ sse2_movq2dq_rex64, sse2_movsq2q_rex64): New.
+ (sse2_movq2dq, sse2_movsq2q): Disable for 64bit.
+ * mmintrin.h (_mm_cvtsi64x_si64, _mm_set_pi64x, _mm_cvtsi64_si64x): New.
+ * xmmintrin.h (_mm_cvtss_si64x, _mm_cvttss_si64x, _mm_cvtsi64x_ss,
+ _mm_set_epi64x, _mm_set1_epi64x, _mm_cvtsd_si64x, _mm_cvttsd_si64x,
+ _mm_cvtsi64x_sd, _mm_cvtsi64x_si128, _mm_cvtsi128_si64x): New.
+
Sat Feb 22 00:48:22 CET 2003 Jan Hubicka <jh@suse.cz>
* i386.c (builtin_description): Add __builtin_ia32_paddq and
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0c90878..e18b1fb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12688,7 +12688,8 @@ x86_initialize_trampoline (tramp, fnaddr, cxt)
#define def_builtin(MASK, NAME, TYPE, CODE) \
do { \
- if ((MASK) & target_flags) \
+ if ((MASK) & target_flags \
+ && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
NULL, NULL_TREE); \
} while (0)
@@ -12705,6 +12706,8 @@ struct builtin_description
/* Used for builtins that are enabled both by -msse and -msse2. */
#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
+#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
+#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
static const struct builtin_description bdesc_comi[] =
{
@@ -12840,6 +12843,7 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
{ MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
{ MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
@@ -12990,6 +12994,7 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
};
@@ -13005,8 +13010,10 @@ static const struct builtin_description bdesc_1arg[] =
{ MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
{ MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+ { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
@@ -13028,6 +13035,8 @@ static const struct builtin_description bdesc_1arg[] =
{ MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+ { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
{ MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
@@ -13073,11 +13082,18 @@ ix86_init_mmx_sse_builtins ()
tree int_ftype_v4sf
= build_function_type_list (integer_type_node,
V4SF_type_node, NULL_TREE);
+ tree int64_ftype_v4sf
+ = build_function_type_list (long_long_integer_type_node,
+ V4SF_type_node, NULL_TREE);
tree int_ftype_v8qi
= build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
tree v4sf_ftype_v4sf_int
= build_function_type_list (V4SF_type_node,
V4SF_type_node, integer_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int64
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, long_long_integer_type_node,
+ NULL_TREE);
tree v4sf_ftype_v4sf_v2si
= build_function_type_list (V4SF_type_node,
V4SF_type_node, V2SI_type_node, NULL_TREE);
@@ -13228,9 +13244,16 @@ ix86_init_mmx_sse_builtins ()
= build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
tree int_ftype_v2df
= build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
+ tree int64_ftype_v2df
+ = build_function_type_list (long_long_integer_type_node,
+ V2DF_type_node, NULL_TREE);
tree v2df_ftype_v2df_int
= build_function_type_list (V2DF_type_node,
V2DF_type_node, integer_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int64
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, long_long_integer_type_node,
+ NULL_TREE);
tree v4sf_ftype_v4sf_v2df
= build_function_type_list (V4SF_type_node,
V4SF_type_node, V2DF_type_node, NULL_TREE);
@@ -13433,9 +13456,12 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+ def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
@@ -13553,12 +13579,15 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
+ def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7e0c621..828cf42 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2115,9 +2115,12 @@ enum ix86_builtins
IX86_BUILTIN_CVTPI2PS,
IX86_BUILTIN_CVTPS2PI,
IX86_BUILTIN_CVTSI2SS,
+ IX86_BUILTIN_CVTSI642SS,
IX86_BUILTIN_CVTSS2SI,
+ IX86_BUILTIN_CVTSS2SI64,
IX86_BUILTIN_CVTTPS2PI,
IX86_BUILTIN_CVTTSS2SI,
+ IX86_BUILTIN_CVTTSS2SI64,
IX86_BUILTIN_MAXPS,
IX86_BUILTIN_MAXSS,
@@ -2376,11 +2379,14 @@ enum ix86_builtins
IX86_BUILTIN_CVTPI2PD,
IX86_BUILTIN_CVTSI2SD,
+ IX86_BUILTIN_CVTSI642SD,
IX86_BUILTIN_CVTSD2SI,
+ IX86_BUILTIN_CVTSD2SI64,
IX86_BUILTIN_CVTSD2SS,
IX86_BUILTIN_CVTSS2SD,
IX86_BUILTIN_CVTTSD2SI,
+ IX86_BUILTIN_CVTTSD2SI64,
IX86_BUILTIN_CVTPS2DQ,
IX86_BUILTIN_CVTPS2PD,
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4c359ee..9449180 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20223,6 +20223,17 @@
(set_attr "athlon_decode" "double,vector")
(set_attr "mode" "SF")])
+(define_insn "cvtss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (vec_select:DI
+ (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "mode" "SF")])
+
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(vec_select:SI
@@ -20235,6 +20246,18 @@
(set_attr "mode" "SF")
(set_attr "athlon_decode" "double,vector")])
+(define_insn "cvttss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (vec_select:DI
+ (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
+ UNSPEC_FIX)
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvttss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "double,vector")])
+
;; MMX insns
@@ -21835,6 +21858,15 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "SI")])
+(define_insn "cvtsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SI")])
+
(define_insn "cvttsd2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
@@ -21845,6 +21877,16 @@
(set_attr "mode" "SI")
(set_attr "athlon_decode" "double,vector")])
+(define_insn "cvttsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
+ (parallel [(const_int 0)]))] UNSPEC_FIX))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvttsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "double,vector")])
+
(define_insn "cvtsi2sd"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
@@ -21858,6 +21900,19 @@
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,direct")])
+(define_insn "cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
+ (vec_duplicate:V2DF
+ (float:DF
+ (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (const_int 2)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsi2sdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")])
+
;; Conversions between SF and DF
(define_insn "cvtsd2ss"
@@ -22794,24 +22849,48 @@
[(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
(parallel [(const_int 0)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !TARGET_64BIT"
"@
movq\t{%1, %0|%0, %1}
movdq2q\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
+(define_insn "sse2_movdq2q_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r")
+ (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_movq2dq"
[(set (match_operand:V2DI 0 "register_operand" "=x,?x")
(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
(const_int 0)))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !TARGET_64BIT"
"@
movq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt,ssemov")
(set_attr "mode" "TI")])
+(define_insn "sse2_movq2dq_rex64"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x")
+ (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r")
+ (const_int 0)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt,ssemov,ssecvt")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_movq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_concat:V2DI (vec_select:DI
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 21cbf7f..7b4aa01 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -56,6 +56,22 @@ _mm_cvtsi32_si64 (int __i)
return (__m64) __tmp;
}
+#ifdef __x86_64__
+/* Convert I to a __m64 object. */
+static __inline __m64
+_mm_cvtsi64x_si64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+/* Convert I to a __m64 object. */
+static __inline __m64
+_mm_set_pi64x (long long __i)
+{
+ return (__m64) __i;
+}
+#endif
+
/* Convert the lower 32 bits of the __m64 object into an integer. */
static __inline int
_mm_cvtsi64_si32 (__m64 __i)
@@ -64,6 +80,15 @@ _mm_cvtsi64_si32 (__m64 __i)
return __tmp;
}
+#ifdef __x86_64__
+/* Convert the lower 32 bits of the __m64 object into an integer. */
+static __inline long long
+_mm_cvtsi64_si64x (__m64 __i)
+{
+ return (long long)__i;
+}
+#endif
+
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
the result, and the four 16-bit values from M2 into the upper four 8-bit
values of the result, all with signed saturation. */
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 1e07ec6..48004f6 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -475,6 +475,16 @@ _mm_cvtss_si32 (__m128 __A)
return __builtin_ia32_cvtss2si ((__v4sf) __A);
}
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+ rounding mode. */
+static __inline long long
+_mm_cvtss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
/* Convert the two lower SPFP values to 32-bit integers according to the
current rounding mode. Return the integers in packed form. */
static __inline __m64
@@ -490,6 +500,15 @@ _mm_cvttss_si32 (__m128 __A)
return __builtin_ia32_cvttss2si ((__v4sf) __A);
}
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer. */
+static __inline long long
+_mm_cvttss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
/* Truncate the two lower SPFP values to 32-bit integers. Return the
integers in packed form. */
static __inline __m64
@@ -505,6 +524,15 @@ _mm_cvtsi32_ss (__m128 __A, int __B)
return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
}
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A. */
+static __inline __m128
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
/* Convert the two 32-bit values in B to SPFP form and insert them
as the two lower elements in A. */
static __inline __m128
@@ -1662,6 +1690,24 @@ _mm_set_epi32 (int __Z, int __Y, int __X, int __W)
return __u.__v;
}
+
+#ifdef __x86_64__
+/* Create the vector [Z Y]. */
+static __inline __m128i
+_mm_set_epi64x (long long __Z, long long __Y)
+{
+ union {
+ long __a[2];
+ __m128i __v;
+ } __u;
+
+ __u.__a[0] = __Y;
+ __u.__a[1] = __Z;
+
+ return __u.__v;
+}
+#endif
+
/* Create the vector [S T U V Z Y X W]. */
static __inline __m128i
_mm_set_epi16 (short __Z, short __Y, short __X, short __W,
@@ -1730,6 +1776,15 @@ _mm_set1_epi32 (int __A)
return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
}
+#ifdef __x86_64__
+static __inline __m128i
+_mm_set1_epi64x (long long __A)
+{
+ __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+ return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0));
+}
+#endif
+
static __inline __m128i
_mm_set1_epi16 (short __A)
{
@@ -1899,12 +1954,28 @@ _mm_cvtsd_si32 (__m128d __A)
return __builtin_ia32_cvtsd2si ((__v2df) __A);
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
static __inline int
_mm_cvttsd_si32 (__m128d __A)
{
return __builtin_ia32_cvttsd2si ((__v2df) __A);
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvttsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
static __inline __m128
_mm_cvtsd_ss (__m128 __A, __m128d __B)
{
@@ -1917,6 +1988,14 @@ _mm_cvtsi32_sd (__m128d __A, int __B)
return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
}
+#ifdef __x86_64__
+static __inline __m128d
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
static __inline __m128d
_mm_cvtss_sd (__m128d __A, __m128 __B)
{
@@ -2465,6 +2544,14 @@ _mm_cvtsi32_si128 (int __A)
return (__m128i) __builtin_ia32_loadd (&__A);
}
+#ifdef __x86_64__
+static __inline __m128i
+_mm_cvtsi64x_si128 (long long __A)
+{
+ return (__m128i) __builtin_ia32_movq2dq (__A);
+}
+#endif
+
static __inline int
_mm_cvtsi128_si32 (__m128i __A)
{
@@ -2473,6 +2560,14 @@ _mm_cvtsi128_si32 (__m128i __A)
return __tmp;
}
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsi128_si64x (__m128i __A)
+{
+ return __builtin_ia32_movdq2q ((__v2di)__A);
+}
+#endif
+
#endif /* __SSE2__ */
#endif /* __SSE__ */