diff options
author | Paul A. Clarke <pc@us.ibm.com> | 2021-10-21 11:21:01 -0500 |
---|---|---|
committer | Paul A. Clarke <pc@us.ibm.com> | 2021-11-29 09:50:43 -0600 |
commit | 85289ba36c2e62de84cc0232c954d9a74bda708a (patch) | |
tree | bb3219e79688decd1413717a3e9b5e88b7246cf4 /gcc | |
parent | e2194a8b39251497d770abf3fb6ee06de6072ed9 (diff) | |
download | gcc-85289ba36c2e62de84cc0232c954d9a74bda708a.zip gcc-85289ba36c2e62de84cc0232c954d9a74bda708a.tar.gz gcc-85289ba36c2e62de84cc0232c954d9a74bda708a.tar.bz2 |
rs6000: Add Power10 optimization for most _mm_movemask*
Power10 ISA added `vextract*` instructions which are realized in the
`vec_extractm` instrinsic.
Use `vec_extractm` for `_mm_movemask_ps`, `_mm_movemask_pd`, and
`_mm_movemask_epi8` compatibility intrinsics, when `_ARCH_PWR10`.
2021-11-29 Paul A. Clarke <pc@us.ibm.com>
gcc
* config/rs6000/xmmintrin.h (_mm_movemask_ps): Use vec_extractm
when _ARCH_PWR10.
* config/rs6000/emmintrin.h (_mm_movemask_pd): Likewise.
(_mm_movemask_epi8): Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/rs6000/emmintrin.h | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/xmmintrin.h | 4 |
2 files changed, 12 insertions, 0 deletions
diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h index 4125b12..c4758be 100644 --- a/gcc/config/rs6000/emmintrin.h +++ b/gcc/config/rs6000/emmintrin.h @@ -1233,6 +1233,9 @@ _mm_loadl_pd (__m128d __A, double const *__B) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_pd (__m128d __A) { +#ifdef _ARCH_PWR10 + return vec_extractm ((__v2du) __A); +#else __vector unsigned long long result; static const __vector unsigned int perm_mask = { @@ -1252,6 +1255,7 @@ _mm_movemask_pd (__m128d __A) #else return result[0]; #endif +#endif /* !_ARCH_PWR10 */ } #endif /* _ARCH_PWR8 */ @@ -2030,6 +2034,9 @@ _mm_min_epu8 (__m128i __A, __m128i __B) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_epi8 (__m128i __A) { +#ifdef _ARCH_PWR10 + return vec_extractm ((__v16qu) __A); +#else __vector unsigned long long result; static const __vector unsigned char perm_mask = { @@ -2046,6 +2053,7 @@ _mm_movemask_epi8 (__m128i __A) #else return result[0]; #endif +#endif /* !_ARCH_PWR10 */ } #endif /* _ARCH_PWR8 */ diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h index ae1a33e..4c093fd 100644 --- a/gcc/config/rs6000/xmmintrin.h +++ b/gcc/config/rs6000/xmmintrin.h @@ -1352,6 +1352,9 @@ _mm_storel_pi (__m64 *__P, __m128 __A) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movemask_ps (__m128 __A) { +#ifdef _ARCH_PWR10 + return vec_extractm ((vector unsigned int) __A); +#else __vector unsigned long long result; static const __vector unsigned int perm_mask = { @@ -1371,6 +1374,7 @@ _mm_movemask_ps (__m128 __A) #else return result[0]; #endif +#endif /* !_ARCH_PWR10 */ } #endif /* _ARCH_PWR8 */ |