diff options
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/Headers/avx2intrin.h | 36 | ||||
| -rw-r--r-- | clang/lib/Headers/smmintrin.h | 18 |
2 files changed, 36 insertions, 18 deletions
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 9263392..e025d6f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -360,73 +360,85 @@ _mm256_movemask_epi8(__m256i __a) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi16(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); + return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); + return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi16(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu32_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); + typedef unsigned int __v4su __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 974fe6f..a45a24c 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -324,37 +324,43 @@ _mm_cvtepi32_epi64(__m128i __V) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V); + typedef unsigned int __v4su __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); } /* SSE4 Pack with Unsigned Saturation. */ |
