diff options
author | Paul A. Clarke <pc@us.ibm.com> | 2019-02-25 19:36:05 +0000 |
---|---|---|
committer | Paul Clarke <pc@gcc.gnu.org> | 2019-02-25 19:36:05 +0000 |
commit | db739d3ca381fc1c149611677f7fbabbcca3318d (patch) | |
tree | 394b22eec4cf167bcc6ad14b9b82ad287dadf4c7 /gcc | |
parent | b5c44c57781011cf2b7977a96646ef9b87907a63 (diff) | |
download | gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.zip gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.tar.gz gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.tar.bz2 |
[rs6000] PR89338, PR89339: Fix compat vector intrinsics for BE and 32-bit
Test FAILS: sse2-cvtpd2dq-1, sse2-cvtpd2ps, sse2-cvttpd2dq on powerpc64
(big-endian).
_mm_cvtpd_epi32, _mm_cvtpd_ps, _mm_cvttpd_epi32: Type conversion from
vector doubleword type to vector word type leaves the results in even
lanes in big endian mode.
Test FAILS: sse-cvtss2si-1, sse-cvtss2si-2, sse-movmskb-1 on powerpc
(32-bit big-endian).
Incorrect type for interpreting the result from mfvsrd instruction leads
to incorrect results. Also, mfvsrd instruction only works as expected in
64-bit mode or for 32-bit quantities in 32-bit mode. A more general,
if slower, solution is needed for 32-bit mode.
2019-02-25 Paul A. Clarke <pc@us.ibm.com>
[gcc]
* config/rs6000/emmintrin.h (_mm_cvtpd_epi32): Fix big endian.
(_mm_cvtpd_ps): Likewise.
(_mm_cvttpd_epi32): Likewise.
PR target/89338
* config/rs6000/xmmintrin.h (_mm_cvtss_f32): Fix type mismatch.
(_mm_cvt_ss2si): Fix type mismatch and 32-bit.
PR target/89339
* config/rs6000/xmmintrin.h (_mm_movemask_pi8): Fix 32-bit.
From-SVN: r269195
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/config/rs6000/emmintrin.h | 12 | ||||
-rw-r--r-- | gcc/config/rs6000/xmmintrin.h | 13 |
3 files changed, 38 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 997c330..6ed6890 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2019-02-25 Paul A. Clarke <pc@us.ibm.com> + +[gcc] + + * config/rs6000/emmintrin.h (_mm_cvtpd_epi32): Fix big endian. + (_mm_cvtpd_ps): Likewise. + (_mm_cvttpd_epi32): Likewise. + + PR target/89338 + * config/rs6000/xmmintrin.h (_mm_cvtss_f32): Fix type mismatch. + (_mm_cvt_ss2si): Fix type mismatch and 32-bit. + + PR target/89339 + * config/rs6000/xmmintrin.h (_mm_movemask_pi8): Fix 32-bit. + + 2019-02-25 Tamar Christina <tamar.christina@arm.com> PR target/88530 diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h index 832af99..3097509 100644 --- a/gcc/config/rs6000/emmintrin.h +++ b/gcc/config/rs6000/emmintrin.h @@ -887,7 +887,11 @@ _mm_cvtpd_epi32 (__m128d __A) : ); #ifdef _ARCH_PWR8 +#ifdef __LITTLE_ENDIAN__ temp = vec_mergeo (temp, temp); +#else + temp = vec_mergee (temp, temp); +#endif result = (__v4si) vec_vpkudum ((__vector long long) temp, (__vector long long) vzero); #else @@ -922,7 +926,11 @@ _mm_cvtpd_ps (__m128d __A) : ); #ifdef _ARCH_PWR8 +#ifdef __LITTLE_ENDIAN__ temp = vec_mergeo (temp, temp); +#else + temp = vec_mergee (temp, temp); +#endif result = (__v4sf) vec_vpkudum ((__vector long long) temp, (__vector long long) vzero); #else @@ -951,7 +959,11 @@ _mm_cvttpd_epi32 (__m128d __A) : ); #ifdef _ARCH_PWR8 +#ifdef __LITTLE_ENDIAN__ temp = vec_mergeo (temp, temp); +#else + temp = vec_mergee (temp, temp); +#endif result = (__v4si) vec_vpkudum ((__vector long long) temp, (__vector long long) vzero); #else diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h index 55159ef..71e4bd4 100644 --- a/gcc/config/rs6000/xmmintrin.h +++ b/gcc/config/rs6000/xmmintrin.h @@ -905,7 +905,7 @@ _mm_cvtss_f32 (__m128 __A) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si32 (__m128 __A) { - __m64 res = 0; + int res; #ifdef _ARCH_PWR8 double dtmp; __asm__( @@ -938,8 +938,8 @@ _mm_cvt_ss2si (__m128 __A) extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si64 (__m128 __A) { - __m64 res = 0; -#ifdef _ARCH_PWR8 + long long res; +#if defined (_ARCH_PWR8) && defined (__powerpc64__) double dtmp; __asm__( #ifdef __LITTLE_ENDIAN__ @@ -1577,6 +1577,7 @@ _m_pminub (__m64 __A, __m64 __B) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_pi8 (__m64 __A) { +#ifdef __powerpc64__ unsigned long long p = #ifdef __LITTLE_ENDIAN__ 0x0008101820283038UL; // permute control for sign bits @@ -1584,6 +1585,12 @@ _mm_movemask_pi8 (__m64 __A) 0x3830282018100800UL; // permute control for sign bits #endif return __builtin_bpermd (p, __A); +#else + unsigned int mask = 0x20283038UL; + unsigned int r1 = __builtin_bpermd (mask, __A) & 0xf; + unsigned int r2 = __builtin_bpermd (mask, __A >> 32) & 0xf; + return (r2 << 4) | r1; +#endif } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |