[rs6000] PR89338, PR89339: Fix compat vector intrinsics for BE and 32-bit

Test FAILS: sse2-cvtpd2dq-1, sse2-cvtpd2ps, sse2-cvttpd2dq on powerpc64 (big-endian). _mm_cvtpd_epi32, _mm_cvtpd_ps, _mm_cvttpd_epi32: Type conversion from vector doubleword type to vector word type leaves the results in even lanes in big endian mode. Test FAILS: sse-cvtss2si-1, sse-cvtss2si-2, sse-movmskb-1 on powerpc (32-bit big-endian). Incorrect type for interpreting the result from mfvsrd instruction leads to incorrect results. Also, mfvsrd instruction only works as expected in 64-bit mode or for 32-bit quantities in 32-bit mode. A more general, if slower, solution is needed for 32-bit mode. 2019-02-25 Paul A. Clarke <pc@us.ibm.com> [gcc] * config/rs6000/emmintrin.h (_mm_cvtpd_epi32): Fix big endian. (_mm_cvtpd_ps): Likewise. (_mm_cvttpd_epi32): Likewise. PR target/89338 * config/rs6000/xmmintrin.h (_mm_cvtss_f32): Fix type mismatch. (_mm_cvt_ss2si): Fix type mismatch and 32-bit. PR target/89339 * config/rs6000/xmmintrin.h (_mm_movemask_pi8): Fix 32-bit. From-SVN: r269195
author: Paul A. Clarke <pc@us.ibm.com> 2019-02-25 19:36:05 +0000
committer: Paul Clarke <pc@gcc.gnu.org> 2019-02-25 19:36:05 +0000
commit: db739d3ca381fc1c149611677f7fbabbcca3318d (patch)
tree: 394b22eec4cf167bcc6ad14b9b82ad287dadf4c7 /gcc
parent: b5c44c57781011cf2b7977a96646ef9b87907a63 (diff)
download: gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.zip
gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.tar.gz
gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.tar.bz2
3 files changed, 38 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 997c330..6ed6890 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2019-02-25  Paul A. Clarke  <pc@us.ibm.com>
+
+[gcc]
+
+	* config/rs6000/emmintrin.h (_mm_cvtpd_epi32): Fix big endian.
+	(_mm_cvtpd_ps): Likewise.
+	(_mm_cvttpd_epi32): Likewise.
+
+	PR target/89338
+	* config/rs6000/xmmintrin.h (_mm_cvtss_f32):  Fix type mismatch.
+	(_mm_cvt_ss2si): Fix type mismatch and 32-bit.
+
+	PR target/89339
+	* config/rs6000/xmmintrin.h (_mm_movemask_pi8): Fix 32-bit.
+
+
 2019-02-25  Tamar Christina  <tamar.christina@arm.com>
 
 	PR target/88530
diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
index 832af99..3097509 100644
--- a/gcc/config/rs6000/emmintrin.h
+++ b/gcc/config/rs6000/emmintrin.h
@@ -887,7 +887,11 @@ _mm_cvtpd_epi32 (__m128d __A)
       : );
 
 #ifdef _ARCH_PWR8
+#ifdef __LITTLE_ENDIAN__
   temp = vec_mergeo (temp, temp);
+#else
+  temp = vec_mergee (temp, temp);
+#endif
   result = (__v4si) vec_vpkudum ((__vector long long) temp,
 				 (__vector long long) vzero);
 #else
@@ -922,7 +926,11 @@ _mm_cvtpd_ps (__m128d __A)
       : );
 
 #ifdef _ARCH_PWR8
+#ifdef __LITTLE_ENDIAN__
   temp = vec_mergeo (temp, temp);
+#else
+  temp = vec_mergee (temp, temp);
+#endif
   result = (__v4sf) vec_vpkudum ((__vector long long) temp,
 				 (__vector long long) vzero);
 #else
@@ -951,7 +959,11 @@ _mm_cvttpd_epi32 (__m128d __A)
       : );
 
 #ifdef _ARCH_PWR8
+#ifdef __LITTLE_ENDIAN__
   temp = vec_mergeo (temp, temp);
+#else
+  temp = vec_mergee (temp, temp);
+#endif
   result = (__v4si) vec_vpkudum ((__vector long long) temp,
 				 (__vector long long) vzero);
 #else
diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
index 55159ef..71e4bd4 100644
--- a/gcc/config/rs6000/xmmintrin.h
+++ b/gcc/config/rs6000/xmmintrin.h
@@ -905,7 +905,7 @@ _mm_cvtss_f32 (__m128 __A)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtss_si32 (__m128 __A)
 {
-  __m64 res = 0;
+  int res;
 #ifdef _ARCH_PWR8
   double dtmp;
   __asm__(
@@ -938,8 +938,8 @@ _mm_cvt_ss2si (__m128 __A)
 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtss_si64 (__m128 __A)
 {
-  __m64 res = 0;
-#ifdef _ARCH_PWR8
+  long long res;
+#if defined (_ARCH_PWR8) && defined (__powerpc64__)
   double dtmp;
   __asm__(
 #ifdef __LITTLE_ENDIAN__
@@ -1577,6 +1577,7 @@ _m_pminub (__m64 __A, __m64 __B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movemask_pi8 (__m64 __A)
 {
+#ifdef __powerpc64__
   unsigned long long p =
 #ifdef __LITTLE_ENDIAN__
                          0x0008101820283038UL; // permute control for sign bits
@@ -1584,6 +1585,12 @@ _mm_movemask_pi8 (__m64 __A)
                          0x3830282018100800UL; // permute control for sign bits
 #endif
   return __builtin_bpermd (p, __A);
+#else
+  unsigned int mask = 0x20283038UL;
+  unsigned int r1 = __builtin_bpermd (mask, __A) & 0xf;
+  unsigned int r2 = __builtin_bpermd (mask, __A >> 32) & 0xf;
+  return (r2 << 4) | r1;
+#endif
 }
 
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
author	Paul A. Clarke <pc@us.ibm.com>	2019-02-25 19:36:05 +0000
committer	Paul Clarke <pc@gcc.gnu.org>	2019-02-25 19:36:05 +0000
commit	db739d3ca381fc1c149611677f7fbabbcca3318d (patch)
tree	394b22eec4cf167bcc6ad14b9b82ad287dadf4c7 /gcc
parent	b5c44c57781011cf2b7977a96646ef9b87907a63 (diff)
download	gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.zip gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.tar.gz gcc-db739d3ca381fc1c149611677f7fbabbcca3318d.tar.bz2