diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-28 08:12:45 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-28 08:12:45 +0000 |
| commit | 91b77ceaed0401cbfae82a0628ff46a501097a5b (patch) | |
| tree | 8d2912868e945e9fc827774aca5bada73deed96d /clang/lib | |
| parent | 869631f9871d19cd36903ead39e4960f4b003116 (diff) | |
| download | llvm-91b77ceaed0401cbfae82a0628ff46a501097a5b.zip llvm-91b77ceaed0401cbfae82a0628ff46a501097a5b.tar.gz llvm-91b77ceaed0401cbfae82a0628ff46a501097a5b.tar.bz2 | |
[X86][SSE] Replace VPMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (clang)
The VPMOVSX and (V)PMOVZX sign/zero extension intrinsics can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics.
This patch removes the clang builtins and their use in the sse2/avx headers - a companion patch will remove/auto-upgrade the llvm intrinsics.
Note: We already did this for SSE41 PMOVSX sometime ago.
Differential Revision: http://reviews.llvm.org/D20684
llvm-svn: 271106
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/Headers/avx2intrin.h | 36 | ||||
| -rw-r--r-- | clang/lib/Headers/smmintrin.h | 18 |
2 files changed, 36 insertions, 18 deletions
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 9263392..e025d6f 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -360,73 +360,85 @@ _mm256_movemask_epi8(__m256i __a) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi16(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); + /* This function always performs a signed extension, but __v16qi is a char + which may be signed or unsigned, so use __v16qs. */ + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); + return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); + return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi16(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi32(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu32_epi64(__m128i __V) { - return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); + typedef unsigned int __v4su __attribute__((__vector_size__(16))); + return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 974fe6f..a45a24c 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -324,37 +324,43 @@ _mm_cvtepi32_epi64(__m128i __V) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V); + typedef unsigned char __v16qu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V); + typedef unsigned short __v8hu __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) { - return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V); + typedef unsigned int __v4su __attribute__((__vector_size__(16))); + return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); } /* SSE4 Pack with Unsigned Saturation. */ |
