From 453ee2313a9f013158f1c5ab8bd97cf495c5c270 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 22 Feb 2003 03:09:06 +0100 Subject: i386.c (def_builtin): Special case 64bit builtins. * i386.c (def_builtin): Special case 64bit builtins. (MASK_SSE164, MASK_SSE264): New constants. (builtin_description): Add 64bit builtins. (ix86_init_mmx_sse_builtins): Likewise. * i386.h (enum ix86_builtins): Likewise. * i386.md (cvtss2siq, cvttss2siq, cvtsd2siq, cvttsd2siq, cvtsi2sdq, sse2_movq2dq_rex64, sse2_movsq2q_rex64): New. (sse2_movq2dq, sse2_movsq2q): Disable for 64bit. * mmintrin.h (_mm_cvtsi64x_si64, _mm_set_pi64x, _mm_cvtsi64_si64x): New. * xmmintrin.h (_mm_cvtss_si64x, _mm_cvttss_si64x, _mm_cvtsi64x_ss, _mm_set_epi64x, _mm_set1_epi64x, _mm_cvtsd_si64x, _mm_cvttsd_si64x, _mm_cvtsi64x_sd, _mm_cvtsi64x_si128, _mm_cvtsi128_si64x): New. From-SVN: r63267 --- gcc/ChangeLog | 15 +++++++ gcc/config/i386/i386.c | 31 ++++++++++++++- gcc/config/i386/i386.h | 6 +++ gcc/config/i386/i386.md | 83 ++++++++++++++++++++++++++++++++++++++- gcc/config/i386/mmintrin.h | 25 ++++++++++++ gcc/config/i386/xmmintrin.h | 95 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 252 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 993fe0c..746458b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +Sat Feb 22 02:35:07 CET 2003 Jan Hubicka + + * i386.c (def_builtin): Special case 64bit builtins. + (MASK_SSE164, MASK_SSE264): New constants. + (builtin_description): Add 64bit builtins. + (ix86_init_mmx_sse_builtins): Likewise. + * i386.h (enum ix86_builtins): Likewise. + * i386.md (cvtss2siq, cvttss2siq, cvtsd2siq, cvttsd2siq, cvtsi2sdq, + sse2_movq2dq_rex64, sse2_movsq2q_rex64): New. + (sse2_movq2dq, sse2_movsq2q): Disable for 64bit. + * mmintrin.h (_mm_cvtsi64x_si64, _mm_set_pi64x, _mm_cvtsi64_si64x): New. + * xmmintrin.h (_mm_cvtss_si64x, _mm_cvttss_si64x, _mm_cvtsi64x_ss, + _mm_set_epi64x, _mm_set1_epi64x, _mm_cvtsd_si64x, _mm_cvttsd_si64x, + _mm_cvtsi64x_sd, _mm_cvtsi64x_si128, _mm_cvtsi128_si64x): New. + Sat Feb 22 00:48:22 CET 2003 Jan Hubicka * i386.c (builtin_description): Add __builtin_ia32_paddq and diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 0c90878..e18b1fb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12688,7 +12688,8 @@ x86_initialize_trampoline (tramp, fnaddr, cxt) #define def_builtin(MASK, NAME, TYPE, CODE) \ do { \ - if ((MASK) & target_flags) \ + if ((MASK) & target_flags \ + && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \ builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ NULL, NULL_TREE); \ } while (0) @@ -12705,6 +12706,8 @@ struct builtin_description /* Used for builtins that are enabled both by -msse and -msse2. */ #define MASK_SSE1 (MASK_SSE | MASK_SSE2) +#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT) +#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT) static const struct builtin_description bdesc_comi[] = { @@ -12840,6 +12843,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 }, { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, @@ -12990,6 +12994,7 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } }; @@ -13005,8 +13010,10 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 }, { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 }, { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 }, { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }, + { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 }, @@ -13028,6 +13035,8 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 }, { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 }, + { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, @@ -13073,11 +13082,18 @@ ix86_init_mmx_sse_builtins () tree int_ftype_v4sf = build_function_type_list (integer_type_node, V4SF_type_node, NULL_TREE); + tree int64_ftype_v4sf + = build_function_type_list (long_long_integer_type_node, + V4SF_type_node, NULL_TREE); tree int_ftype_v8qi = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); tree v4sf_ftype_v4sf_int = build_function_type_list (V4SF_type_node, V4SF_type_node, integer_type_node, NULL_TREE); + tree v4sf_ftype_v4sf_int64 + = build_function_type_list (V4SF_type_node, + V4SF_type_node, long_long_integer_type_node, + NULL_TREE); tree v4sf_ftype_v4sf_v2si = build_function_type_list (V4SF_type_node, V4SF_type_node, V2SI_type_node, NULL_TREE); @@ -13228,9 +13244,16 @@ ix86_init_mmx_sse_builtins () = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); tree int_ftype_v2df = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); + tree int64_ftype_v2df + = build_function_type_list (long_long_integer_type_node, + V2DF_type_node, NULL_TREE); tree v2df_ftype_v2df_int = build_function_type_list (V2DF_type_node, V2DF_type_node, integer_type_node, NULL_TREE); + tree v2df_ftype_v2df_int64 + = build_function_type_list (V2DF_type_node, + V2DF_type_node, long_long_integer_type_node, + NULL_TREE); tree v4sf_ftype_v4sf_v2df = build_function_type_list (V4SF_type_node, V4SF_type_node, V2DF_type_node, NULL_TREE); @@ -13433,9 +13456,12 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS); def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI); def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS); + def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS); def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI); + def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64); def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI); def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI); + def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW); def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW); @@ -13553,12 +13579,15 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI); def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI); + def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64); + def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD); def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD); + def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD); def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS); def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7e0c621..828cf42 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2115,9 +2115,12 @@ enum ix86_builtins IX86_BUILTIN_CVTPI2PS, IX86_BUILTIN_CVTPS2PI, IX86_BUILTIN_CVTSI2SS, + IX86_BUILTIN_CVTSI642SS, IX86_BUILTIN_CVTSS2SI, + IX86_BUILTIN_CVTSS2SI64, IX86_BUILTIN_CVTTPS2PI, IX86_BUILTIN_CVTTSS2SI, + IX86_BUILTIN_CVTTSS2SI64, IX86_BUILTIN_MAXPS, IX86_BUILTIN_MAXSS, @@ -2376,11 +2379,14 @@ enum ix86_builtins IX86_BUILTIN_CVTPI2PD, IX86_BUILTIN_CVTSI2SD, + IX86_BUILTIN_CVTSI642SD, IX86_BUILTIN_CVTSD2SI, + IX86_BUILTIN_CVTSD2SI64, IX86_BUILTIN_CVTSD2SS, IX86_BUILTIN_CVTSS2SD, IX86_BUILTIN_CVTTSD2SI, + IX86_BUILTIN_CVTTSD2SI64, IX86_BUILTIN_CVTPS2DQ, IX86_BUILTIN_CVTPS2PD, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4c359ee..9449180 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20223,6 +20223,17 @@ (set_attr "athlon_decode" "double,vector") (set_attr "mode" "SF")]) +(define_insn "cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "SF")]) + (define_insn "cvttss2si" [(set (match_operand:SI 0 "register_operand" "=r,r") (vec_select:SI @@ -20235,6 +20246,18 @@ (set_attr "mode" "SF") (set_attr "athlon_decode" "double,vector")]) +(define_insn "cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] + UNSPEC_FIX) + (parallel [(const_int 0)])))] + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double,vector")]) + ;; MMX insns @@ -21835,6 +21858,15 @@ [(set_attr "type" "sseicvt") (set_attr "mode" "SI")]) +(define_insn "cvtsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SI")]) + (define_insn "cvttsd2si" [(set (match_operand:SI 0 "register_operand" "=r,r") (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") @@ -21845,6 +21877,16 @@ (set_attr "mode" "SI") (set_attr "athlon_decode" "double,vector")]) +(define_insn "cvttsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") + (parallel [(const_int 0)]))] UNSPEC_FIX))] + "TARGET_SSE2 && TARGET_64BIT" + "cvttsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "double,vector")]) + (define_insn "cvtsi2sd" [(set (match_operand:V2DF 0 "register_operand" "=x,x") (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") @@ -21858,6 +21900,19 @@ (set_attr "mode" "DF") (set_attr "athlon_decode" "double,direct")]) +(define_insn "cvtsi2sdq" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") + (vec_duplicate:V2DF + (float:DF + (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 2)))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsi2sdq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct")]) + ;; Conversions between SF and DF (define_insn "cvtsd2ss" @@ -22794,24 +22849,48 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y") (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x") (parallel [(const_int 0)])))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_64BIT" "@ movq\t{%1, %0|%0, %1} movdq2q\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) +(define_insn "sse2_movdq2q_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r") + (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movq2dq" [(set (match_operand:V2DI 0 "register_operand" "=x,?x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y") (const_int 0)))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_64BIT" "@ movq\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt,ssemov") (set_attr "mode" "TI")]) +(define_insn "sse2_movq2dq_rex64" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x") + (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r") + (const_int 0)))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssemov,ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movq" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_concat:V2DI (vec_select:DI diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index 21cbf7f..7b4aa01 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -56,6 +56,22 @@ _mm_cvtsi32_si64 (int __i) return (__m64) __tmp; } +#ifdef __x86_64__ +/* Convert I to a __m64 object. */ +static __inline __m64 +_mm_cvtsi64x_si64 (long long __i) +{ + return (__m64) __i; +} + +/* Convert I to a __m64 object. */ +static __inline __m64 +_mm_set_pi64x (long long __i) +{ + return (__m64) __i; +} +#endif + /* Convert the lower 32 bits of the __m64 object into an integer. */ static __inline int _mm_cvtsi64_si32 (__m64 __i) @@ -64,6 +80,15 @@ _mm_cvtsi64_si32 (__m64 __i) return __tmp; } +#ifdef __x86_64__ +/* Convert the lower 32 bits of the __m64 object into an integer. */ +static __inline long long +_mm_cvtsi64_si64x (__m64 __i) +{ + return (long long)__i; +} +#endif + /* Pack the four 16-bit values from M1 into the lower four 8-bit values of the result, and the four 16-bit values from M2 into the upper four 8-bit values of the result, all with signed saturation. */ diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 1e07ec6..48004f6 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -475,6 +475,16 @@ _mm_cvtss_si32 (__m128 __A) return __builtin_ia32_cvtss2si ((__v4sf) __A); } +#ifdef __x86_64__ +/* Convert the lower SPFP value to a 32-bit integer according to the current + rounding mode. */ +static __inline long long +_mm_cvtss_si64x (__m128 __A) +{ + return __builtin_ia32_cvtss2si64 ((__v4sf) __A); +} +#endif + /* Convert the two lower SPFP values to 32-bit integers according to the current rounding mode. Return the integers in packed form. */ static __inline __m64 @@ -490,6 +500,15 @@ _mm_cvttss_si32 (__m128 __A) return __builtin_ia32_cvttss2si ((__v4sf) __A); } +#ifdef __x86_64__ +/* Truncate the lower SPFP value to a 32-bit integer. */ +static __inline long long +_mm_cvttss_si64x (__m128 __A) +{ + return __builtin_ia32_cvttss2si64 ((__v4sf) __A); +} +#endif + /* Truncate the two lower SPFP values to 32-bit integers. Return the integers in packed form. */ static __inline __m64 @@ -505,6 +524,15 @@ _mm_cvtsi32_ss (__m128 __A, int __B) return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B); } +#ifdef __x86_64__ +/* Convert B to a SPFP value and insert it as element zero in A. */ +static __inline __m128 +_mm_cvtsi64x_ss (__m128 __A, long long __B) +{ + return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); +} +#endif + /* Convert the two 32-bit values in B to SPFP form and insert them as the two lower elements in A. */ static __inline __m128 @@ -1662,6 +1690,24 @@ _mm_set_epi32 (int __Z, int __Y, int __X, int __W) return __u.__v; } + +#ifdef __x86_64__ +/* Create the vector [Z Y]. */ +static __inline __m128i +_mm_set_epi64x (long long __Z, long long __Y) +{ + union { + long __a[2]; + __m128i __v; + } __u; + + __u.__a[0] = __Y; + __u.__a[1] = __Z; + + return __u.__v; +} +#endif + /* Create the vector [S T U V Z Y X W]. */ static __inline __m128i _mm_set_epi16 (short __Z, short __Y, short __X, short __W, @@ -1730,6 +1776,15 @@ _mm_set1_epi32 (int __A) return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0)); } +#ifdef __x86_64__ +static __inline __m128i +_mm_set1_epi64x (long long __A) +{ + __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A); + return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0)); +} +#endif + static __inline __m128i _mm_set1_epi16 (short __A) { @@ -1899,12 +1954,28 @@ _mm_cvtsd_si32 (__m128d __A) return __builtin_ia32_cvtsd2si ((__v2df) __A); } +#ifdef __x86_64__ +static __inline long long +_mm_cvtsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvtsd2si64 ((__v2df) __A); +} +#endif + static __inline int _mm_cvttsd_si32 (__m128d __A) { return __builtin_ia32_cvttsd2si ((__v2df) __A); } +#ifdef __x86_64__ +static __inline long long +_mm_cvttsd_si64x (__m128d __A) +{ + return __builtin_ia32_cvttsd2si64 ((__v2df) __A); +} +#endif + static __inline __m128 _mm_cvtsd_ss (__m128 __A, __m128d __B) { @@ -1917,6 +1988,14 @@ _mm_cvtsi32_sd (__m128d __A, int __B) return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); } +#ifdef __x86_64__ +static __inline __m128d +_mm_cvtsi64x_sd (__m128d __A, long long __B) +{ + return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); +} +#endif + static __inline __m128d _mm_cvtss_sd (__m128d __A, __m128 __B) { @@ -2465,6 +2544,14 @@ _mm_cvtsi32_si128 (int __A) return (__m128i) __builtin_ia32_loadd (&__A); } +#ifdef __x86_64__ +static __inline __m128i +_mm_cvtsi64x_si128 (long long __A) +{ + return (__m128i) __builtin_ia32_movq2dq (__A); +} +#endif + static __inline int _mm_cvtsi128_si32 (__m128i __A) { @@ -2473,6 +2560,14 @@ _mm_cvtsi128_si32 (__m128i __A) return __tmp; } +#ifdef __x86_64__ +static __inline long long +_mm_cvtsi128_si64x (__m128i __A) +{ + return __builtin_ia32_movdq2q ((__v2di)__A); +} +#endif + #endif /* __SSE2__ */ #endif /* __SSE__ */ -- cgit v1.1