diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2007-12-13 19:19:38 +0100 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2007-12-13 19:19:38 +0100 |
commit | bfcd72332c49fce4b4368d89cbf0e2c7386665b5 (patch) | |
tree | 6539270280c421c0698010e006107f0cbbc6ab11 /gcc/config | |
parent | 17cf3985108a463cd143c310b566371f54008193 (diff) | |
download | gcc-bfcd72332c49fce4b4368d89cbf0e2c7386665b5.zip gcc-bfcd72332c49fce4b4368d89cbf0e2c7386665b5.tar.gz gcc-bfcd72332c49fce4b4368d89cbf0e2c7386665b5.tar.bz2 |
re PR target/34435 (SSE2 intrinsics - emmintrin with optimisations off and type conversion error)
PR target/34435
* config/i386/emmintrin.h (_mm_shuffle_pd, _mm_extract_epi16,
_mm_insert_epi16, _mm_shufflehi_epi16, _mm_shufflelo_epi16,
_mm_shuffle_epi32): Cast non-constant input values to either __m64,
__m128, __m128i or __m128d in a macro version of the intrinsic.
Cast constant input values to int.
* config/i386/ammintrin.h (_mm_extracti_si64, _mm_inserti_si64): Ditto.
* config/i386/bmmintrin.h (_mm_roti_epi8, _mm_roti_epi16,
_mm_roti_epi32, _mm_roti_epi64): Ditto.
* config/i386/smmintrin.h (_mm_blend_epi16, _mm_blend_ps, _mm_blend_pd,
_mm_dp_ps, _mm_dp_pd, _mm_insert_ps, _mm_extract_ps, _mm_insert_epi8,
_mm_insert_epi32, _mm_insert_epi64, _mm_extract_epi8, mm_extract_epi32,
_mm_extract_epi64, _mm_mpsadbw_epu8, _mm_cmpistrm, _mm_cmpistri,
_mm_cmpestrm, _mm_cmpestri, _mm_cmpistra, _mm_cmpistrc, _mm_cmpistro,
_mm_cmpistrs, _mm_cmpistrz, _mm_cmpestra, _mm_cmpestrc, _mm_cmpestro,
_mm_cmpestrs, _mm_cmpestrz): Ditto.
* config/i386/tmmintrin.h (_mm_alignr_epi8, _mm_alignr_pi8): Ditto.
* config/i386/xmmintrin.h (_mm_shuffle_ps, _mm_extract_pi16, _m_pextrw,
_mm_insert_pi16, _m_pinsrw, _mm_shuffle_pi16, _m_pshufw): Ditto.
* config/i386/mmintrin-common.h (_mm_round_pd, _mm_round_sd,
_mm_round_ps, _mm_round_ss): Ditto.
testsuite/ChangeLog:
PR target/34435
* g++.dg/other/pr34435.C: New testcase.
From-SVN: r130904
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/ammintrin.h | 11 | ||||
-rw-r--r-- | gcc/config/i386/bmmintrin.h | 20 | ||||
-rw-r--r-- | gcc/config/i386/emmintrin.h | 18 | ||||
-rw-r--r-- | gcc/config/i386/mmintrin-common.h | 14 | ||||
-rw-r--r-- | gcc/config/i386/smmintrin.h | 159 | ||||
-rw-r--r-- | gcc/config/i386/tmmintrin.h | 19 | ||||
-rw-r--r-- | gcc/config/i386/xmmintrin.h | 24 |
7 files changed, 156 insertions, 109 deletions
diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h index c3f73b7..1351ebd 100644 --- a/gcc/config/i386/ammintrin.h +++ b/gcc/config/i386/ammintrin.h @@ -62,8 +62,9 @@ _mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L) return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L); } #else -#define _mm_extracti_si64(X, I, L) \ - ((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L)) +#define _mm_extracti_si64(X, I, L) \ + ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), \ + (unsigned int)(I), (unsigned int)(L))) #endif static __inline __m128i __attribute__((__always_inline__, __artificial__)) @@ -79,8 +80,10 @@ _mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned cons return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L); } #else -#define _mm_inserti_si64(X, Y, I, L) \ - ((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L)) +#define _mm_inserti_si64(X, Y, I, L) \ + ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), \ + (__v2di)(__m128i)(Y), \ + (unsigned int)(I), (unsigned int)(L))) #endif #endif /* __SSE4A__ */ diff --git a/gcc/config/i386/bmmintrin.h b/gcc/config/i386/bmmintrin.h index cfd113d..48830f3 100644 --- a/gcc/config/i386/bmmintrin.h +++ b/gcc/config/i386/bmmintrin.h @@ -352,33 +352,37 @@ _mm_rot_epi64(__m128i __A, __m128i __B) /* Rotates - Immediate form */ #ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) -_mm_roti_epi8(__m128i __A, int __B) +_mm_roti_epi8(__m128i __A, const int __B) { return (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B); } static __inline __m128i __attribute__((__always_inline__, __artificial__)) -_mm_roti_epi16(__m128i __A, int __B) +_mm_roti_epi16(__m128i __A, const int __B) { return (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B); } static __inline __m128i __attribute__((__always_inline__, __artificial__)) -_mm_roti_epi32(__m128i __A, int __B) +_mm_roti_epi32(__m128i __A, const int __B) { return (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B); } static __inline __m128i __attribute__((__always_inline__, __artificial__)) -_mm_roti_epi64(__m128i __A, int __B) +_mm_roti_epi64(__m128i __A, const int __B) { return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B); } #else -#define _mm_roti_epi8(A, B) ((_m128i) __builtin_ia32_protbi ((__v16qi)(A), B) -#define _mm_roti_epi16(A, B) ((_m128i) __builtin_ia32_protwi ((__v8hi)(A), B) -#define _mm_roti_epi32(A, B) ((_m128i) __builtin_ia32_protdi ((__v4si)(A), B) -#define _mm_roti_epi64(A, B) ((_m128i) __builtin_ia32_protqi ((__v2di)(A), B) +#define _mm_roti_epi8(A, B) \ + ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(B))) +#define _mm_roti_epi16(A, B) \ + ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(B))) +#define _mm_roti_epi32(A, B) \ + ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(B))) +#define _mm_roti_epi64(A, B) \ + ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(B)) #endif /* pshl */ diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 0451ed7..1a0affc 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -887,8 +887,9 @@ _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); } #else -#define _mm_shuffle_pd(__A, __B, __C) \ - ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C))) +#define _mm_shuffle_pd(__A, __B, __C) \ + ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)__A, \ + (__v2df)(__m128d)__B, (int)(__C))) #endif static __inline __m128d __attribute__((__always_inline__, __artificial__)) @@ -1320,9 +1321,10 @@ _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) } #else #define _mm_extract_epi16(A, N) \ - ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N))) -#define _mm_insert_epi16(A, D, N) \ - ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N))) + ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) +#define _mm_insert_epi16(A, D, N) \ + ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ + (int)(D), (int)(N))) #endif static __inline __m128i __attribute__((__always_inline__, __artificial__)) @@ -1381,11 +1383,11 @@ _mm_shuffle_epi32 (__m128i __A, const int __mask) } #else #define _mm_shufflehi_epi16(__A, __B) \ - ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B)) + ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)__A, (int)__B)) #define _mm_shufflelo_epi16(__A, __B) \ - ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B)) + ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)__A, (int)__B)) #define _mm_shuffle_epi32(__A, __B) \ - ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B)) + ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)__A, (int)__B)) #endif static __inline void __attribute__((__always_inline__, __artificial__)) diff --git a/gcc/config/i386/mmintrin-common.h b/gcc/config/i386/mmintrin-common.h index 4d0f751..3be8333 100644 --- a/gcc/config/i386/mmintrin-common.h +++ b/gcc/config/i386/mmintrin-common.h @@ -108,10 +108,11 @@ _mm_round_sd(__m128d __D, __m128d __V, const int __M) } #else #define _mm_round_pd(V, M) \ - ((__m128d) __builtin_ia32_roundpd ((__v2df)(V), (M))) + ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M))) -#define _mm_round_sd(D, V, M) \ - ((__m128d) __builtin_ia32_roundsd ((__v2df)(D), (__v2df)(V), (M))) +#define _mm_round_sd(D, V, M) \ + ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D), \ + (__v2df)(__m128d)(V), (int)(M))) #endif /* Packed/scalar single precision floating point rounding. */ @@ -132,10 +133,11 @@ _mm_round_ss (__m128 __D, __m128 __V, const int __M) } #else #define _mm_round_ps(V, M) \ - ((__m128) __builtin_ia32_roundps ((__v4sf)(V), (M))) + ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M))) -#define _mm_round_ss(D, V, M) \ - ((__m128) __builtin_ia32_roundss ((__v4sf)(D), (__v4sf)(V), (M))) +#define _mm_round_ss(D, V, M) \ + ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D), \ + (__v4sf)(__m128)(V), (int)(M))) #endif /* Macros for ceil/floor intrinsics. */ diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 693ebf4..3989773 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -53,8 +53,9 @@ _mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M) __M); } #else -#define _mm_blend_epi16(X, Y, M) \ - ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(X), (__v8hi)(Y), (M))) +#define _mm_blend_epi16(X, Y, M) \ + ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X), \ + (__v8hi)(__m128i)(Y), (int)(M))) #endif static __inline __m128i __attribute__((__always_inline__, __artificial__)) @@ -77,8 +78,9 @@ _mm_blend_ps (__m128 __X, __m128 __Y, const int __M) __M); } #else -#define _mm_blend_ps(X, Y, M) \ - ((__m128) __builtin_ia32_blendps ((__v4sf)(X), (__v4sf)(Y), (M))) +#define _mm_blend_ps(X, Y, M) \ + ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(M))) #endif static __inline __m128 __attribute__((__always_inline__, __artificial__)) @@ -101,8 +103,9 @@ _mm_blend_pd (__m128d __X, __m128d __Y, const int __M) __M); } #else -#define _mm_blend_pd(X, Y, M) \ - ((__m128d) __builtin_ia32_blendpd ((__v2df)(X), (__v2df)(Y), (M))) +#define _mm_blend_pd(X, Y, M) \ + ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(M))) #endif static __inline __m128d __attribute__((__always_inline__, __artificial__)) @@ -133,11 +136,13 @@ _mm_dp_pd (__m128d __X, __m128d __Y, const int __M) __M); } #else -#define _mm_dp_ps(X, Y, M) \ - ((__m128) __builtin_ia32_dpps ((__v4sf)(X), (__v4sf)(Y), (M))) +#define _mm_dp_ps(X, Y, M) \ + ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(M))) -#define _mm_dp_pd(X, Y, M) \ - ((__m128d) __builtin_ia32_dppd ((__v2df)(X), (__v2df)(Y), (M))) +#define _mm_dp_pd(X, Y, M) \ + ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(M))) #endif /* Packed integer 64-bit comparison, zeroing or filling with ones @@ -228,8 +233,9 @@ _mm_insert_ps (__m128 __D, __m128 __S, const int __N) __N); } #else -#define _mm_insert_ps(D, S, N) \ - ((__m128) __builtin_ia32_insertps128 ((__v4sf)(D), (__v4sf)(S), (N))) +#define _mm_insert_ps(D, S, N) \ + ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D), \ + (__v4sf)(__m128)(S), (int)(N))) #endif /* Helper macro to create the N value for _mm_insert_ps. */ @@ -247,14 +253,13 @@ _mm_extract_ps (__m128 __X, const int __N) return __tmp.i; } #else -#define _mm_extract_ps(X, N) \ - (__extension__ \ - ({ \ - union { int i; float f; } __tmp; \ - __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(X), (N)); \ - __tmp.i; \ - }) \ - ) +#define _mm_extract_ps(X, N) \ + (__extension__ \ + ({ \ + union { int i; float f; } __tmp; \ + __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \ + __tmp.i; \ + })) #endif /* Extract binary representation of single precision float into @@ -296,15 +301,18 @@ _mm_insert_epi64 (__m128i __D, long long __S, const int __N) } #endif #else -#define _mm_insert_epi8(D, S, N) \ - ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(D), (S), (N))) +#define _mm_insert_epi8(D, S, N) \ + ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D), \ + (int)(S), (int)(N))) -#define _mm_insert_epi32(D, S, N) \ - ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(D), (S), (N))) +#define _mm_insert_epi32(D, S, N) \ + ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D), \ + (int)(S), (int)(N))) #ifdef __x86_64__ -#define _mm_insert_epi64(D, S, N) \ - ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(D), (S), (N))) +#define _mm_insert_epi64(D, S, N) \ + ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D), \ + (long long)(S), (int)(N))) #endif #endif @@ -333,13 +341,13 @@ _mm_extract_epi64 (__m128i __X, const int __N) #endif #else #define _mm_extract_epi8(X, N) \ - __builtin_ia32_vec_ext_v16qi ((__v16qi) X, (N)) + __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)) #define _mm_extract_epi32(X, N) \ - __builtin_ia32_vec_ext_v4si ((__v4si) X, (N)) + __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)) #ifdef __x86_64__ #define _mm_extract_epi64(X, N) \ - ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(X), (N))) + ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N))) #endif #endif @@ -447,8 +455,9 @@ _mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M) (__v16qi)__Y, __M); } #else -#define _mm_mpsadbw_epu8(X, Y, M) \ - ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(X), (__v16qi)(Y), (M))) +#define _mm_mpsadbw_epu8(X, Y, M) \ + ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (int)(M))) #endif /* Load double quadword using non-temporal aligned hint. */ @@ -521,17 +530,21 @@ _mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) __M); } #else -#define _mm_cmpistrm(X, Y, M) \ - ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(X), (__v16qi)(Y), (M))) -#define _mm_cmpistri(X, Y, M) \ - __builtin_ia32_pcmpistri128 ((__v16qi)(X), (__v16qi)(Y), (M)) - -#define _mm_cmpestrm(X, LX, Y, LY, M) \ - ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(X), (int)(LX), \ - (__v16qi)(Y), (int)(LY), (M))) -#define _mm_cmpestri(X, LX, Y, LY, M) \ - __builtin_ia32_pcmpestri128 ((__v16qi)(X), (int)(LX), \ - (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpistrm(X, Y, M) \ + ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (int)(M))) +#define _mm_cmpistri(X, Y, M) \ + ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (int)(M))) + +#define _mm_cmpestrm(X, LX, Y, LY, M) \ + ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X), \ + (int)(LX), (__v16qi)(__m128i)(Y), \ + (int)(LY), (int)(M))) +#define _mm_cmpestri(X, LX, Y, LY, M) \ + ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \ + (__v16qi)(__m128i)(Y), (int)(LY), \ + (int)(M)) #endif /* Intrinsics for text/string processing and reading values of @@ -618,32 +631,42 @@ _mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) __M); } #else -#define _mm_cmpistra(X, Y, M) \ - __builtin_ia32_pcmpistria128 ((__v16qi)(X), (__v16qi)(Y), (M)) -#define _mm_cmpistrc(X, Y, M) \ - __builtin_ia32_pcmpistric128 ((__v16qi)(X), (__v16qi)(Y), (M)) -#define _mm_cmpistro(X, Y, M) \ - __builtin_ia32_pcmpistrio128 ((__v16qi)(X), (__v16qi)(Y), (M)) -#define _mm_cmpistrs(X, Y, M) \ - __builtin_ia32_pcmpistris128 ((__v16qi)(X), (__v16qi)(Y), (M)) -#define _mm_cmpistrz(X, Y, M) \ - __builtin_ia32_pcmpistriz128 ((__v16qi)(X), (__v16qi)(Y), (M)) - -#define _mm_cmpestra(X, LX, Y, LY, M) \ - __builtin_ia32_pcmpestria128 ((__v16qi)(X), (int)(LX), \ - (__v16qi)(Y), (int)(LY), (M)) -#define _mm_cmpestrc(X, LX, Y, LY, M) \ - __builtin_ia32_pcmpestric128 ((__v16qi)(X), (int)(LX), \ - (__v16qi)(Y), (int)(LY), (M)) -#define _mm_cmpestro(X, LX, Y, LY, M) \ - __builtin_ia32_pcmpestrio128 ((__v16qi)(X), (int)(LX), \ - (__v16qi)(Y), (int)(LY), (M)) -#define _mm_cmpestrs(X, LX, Y, LY, M) \ - __builtin_ia32_pcmpestris128 ((__v16qi)(X), (int)(LX), \ - (__v16qi)(Y), (int)(LY), (M)) -#define _mm_cmpestrz(X, LX, Y, LY, M) \ - __builtin_ia32_pcmpestriz128 ((__v16qi)(X), (int)(LX), \ - (__v16qi)(Y), (int)(LY), (M)) +#define _mm_cmpistra(X, Y, M) \ + ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (int)(M))) +#define _mm_cmpistrc(X, Y, M) \ + ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (int)(M))) +#define _mm_cmpistro(X, Y, M) \ + ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (int)(M))) +#define _mm_cmpistrs(X, Y, M) \ + ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (int)(M))) +#define _mm_cmpistrz(X, Y, M) \ + ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X), \ + (__v16qi)(__m128i)(Y), (int)(M))) + +#define _mm_cmpestra(X, LX, Y, LY, M) \ + ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \ + (__v16qi)(__m128i)(Y), (int)(LY), \ + (int)(M))) +#define _mm_cmpestrc(X, LX, Y, LY, M) \ + ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \ + (__v16qi)(__m128i)(Y), (int)(LY), \ + (int)(M))) +#define _mm_cmpestro(X, LX, Y, LY, M) \ + ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \ + (__v16qi)(__m128i)(Y), (int)(LY), \ + (int)(M))) +#define _mm_cmpestrs(X, LX, Y, LY, M) \ + ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \ + (__v16qi)(__m128i)(Y), (int)(LY), \ + (int)(M))) +#define _mm_cmpestrz(X, LX, Y, LY, M) \ + ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \ + (__v16qi)(__m128i)(Y), (int)(LY), \ + (int)(M))) #endif /* Packed integer 64-bit comparison, zeroing or filling with ones diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h index 900bfbf..6b38913 100644 --- a/gcc/config/i386/tmmintrin.h +++ b/gcc/config/i386/tmmintrin.h @@ -185,18 +185,25 @@ _mm_sign_pi32 (__m64 __X, __m64 __Y) static __inline __m128i __attribute__((__always_inline__, __artificial__)) _mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N) { - return (__m128i)__builtin_ia32_palignr128 ((__v2di)__X, (__v2di)__Y, __N * 8);} + return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X, + (__v2di)__Y, __N * 8); +} static __inline __m64 __attribute__((__always_inline__, __artificial__)) _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N) { - return (__m64)__builtin_ia32_palignr ((long long)__X, (long long)__Y, __N * 8); + return (__m64) __builtin_ia32_palignr ((long long)__X, + (long long)__Y, __N * 8); } #else -#define _mm_alignr_epi8(__X, __Y, __N) \ - ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8)) -#define _mm_alignr_pi8(__X, __Y, __N) \ - ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8)) +#define _mm_alignr_epi8(X, Y, N) \ + ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X), \ + (__v2di)(__m128i)(Y), \ + (int)(N) * 8)) +#define _mm_alignr_pi8(X, Y, N) \ + ((__m64) __builtin_ia32_palignr ((long long)(__m64)(__X), \ + (long long)(__m64)(__Y), \ + (int)(N) * 8)) #endif static __inline __m128i __attribute__((__always_inline__, __artificial__)) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index c06b5ac..ab3aceb 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -723,8 +723,9 @@ _mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask); } #else -#define _mm_shuffle_ps(A, B, MASK) \ - ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK))) +#define _mm_shuffle_ps(A, B, MASK) \ + ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (int)(MASK))) #endif /* Selects and interleaves the upper two SPFP values from A and B. */ @@ -1004,8 +1005,10 @@ _m_pextrw (__m64 const __A, int const __N) return _mm_extract_pi16 (__A, __N); } #else -#define _mm_extract_pi16(A, N) __builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N)) -#define _m_pextrw(A, N) _mm_extract_pi16((A), (N)) +#define _mm_extract_pi16(A, N) \ + ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N))) +#define _m_pextrw(A, N) \ + ((int) _mm_extract_pi16((__m64)(A),(int)(N))) #endif /* Inserts word D into one of four words of A. The selector N must be @@ -1023,9 +1026,11 @@ _m_pinsrw (__m64 const __A, int const __D, int const __N) return _mm_insert_pi16 (__A, __D, __N); } #else -#define _mm_insert_pi16(A, D, N) \ - ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N))) -#define _m_pinsrw(A, D, N) _mm_insert_pi16((A), (D), (N)) +#define _mm_insert_pi16(A, D, N) \ + ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), \ + (int)(D), (int)(N))) +#define _m_pinsrw(A, D, N) \ + ((__m64) _mm_insert_pi16((__m64)(A), (int)(D), (int)(N)) #endif /* Compute the element-wise maximum of signed 16-bit values. */ @@ -1123,8 +1128,9 @@ _m_pshufw (__m64 __A, int const __N) } #else #define _mm_shuffle_pi16(A, N) \ - ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N))) -#define _m_pshufw(A, N) _mm_shuffle_pi16 ((A), (N)) + ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N))) +#define _m_pshufw(A, N) \ + ((__m64) _mm_shuffle_pi16 ((__m64)(A), (int)(N)) #endif /* Conditionally store byte elements of A into P. The high bit of each |