diff options
author | Paul A. Clarke <pc@us.ibm.com> | 2021-08-09 13:08:25 -0500 |
---|---|---|
committer | Paul A. Clarke <pc@us.ibm.com> | 2021-10-19 10:36:59 -0500 |
commit | 3cfbe5dc08b574bccc398256946cc03e2a767329 (patch) | |
tree | 25a448674949dd45ac06840a415b6f9a404727ed | |
parent | ce8add4b0e086e671a7e08503408356ad6beee7f (diff) | |
download | gcc-3cfbe5dc08b574bccc398256946cc03e2a767329.zip gcc-3cfbe5dc08b574bccc398256946cc03e2a767329.tar.gz gcc-3cfbe5dc08b574bccc398256946cc03e2a767329.tar.bz2 |
rs6000: Guard some x86 intrinsics implementations
Some compatibility implementations of x86 intrinsics include
Power intrinsics which require POWER8. Guard them.
emmintrin.h:
- _mm_cmpord_pd: Remove code which was ostensibly for pre-POWER8,
but which indeed depended on POWER8 (vec_cmpgt(v2du)/vcmpgtud).
The "POWER8" version works fine on pre-POWER8.
- _mm_mul_epu32: vec_mule(v4su) uses vmuleuw.
pmmintrin.h:
- _mm_movehdup_ps: vec_mergeo(v4su) uses vmrgow.
- _mm_moveldup_ps: vec_mergee(v4su) uses vmrgew.
smmintrin.h:
- _mm_cmpeq_epi64: vec_cmpeq(v2di) uses vcmpequd.
- _mm_mul_epi32: vec_mule(v4si) uses vmuluwm.
- _mm_cmpgt_epi64: vec_cmpgt(v2di) uses vcmpgtsd.
tmmintrin.h:
- _mm_sign_epi8: vec_neg(v4si) uses vsububm.
- _mm_sign_epi16: vec_neg(v4si) uses vsubuhm.
- _mm_sign_epi32: vec_neg(v4si) uses vsubuwm.
Note that the above three could actually be supported pre-POWER8,
but current GCC does not support them before POWER8.
- _mm_sign_pi8: depends on _mm_sign_epi8.
- _mm_sign_pi16: depends on _mm_sign_epi16.
- _mm_sign_pi32: depends on _mm_sign_epi32.
sse4_2-pcmpgtq.c:
- _mm_cmpgt_epi64: vec_cmpeq(v2di) uses vcmpequd.
2021-10-19 Paul A. Clarke <pc@us.ibm.com>
gcc
PR target/101893
PR target/102719
* config/rs6000/emmintrin.h: Guard POWER8 intrinsics.
* config/rs6000/pmmintrin.h: Same.
* config/rs6000/smmintrin.h: Same.
* config/rs6000/tmmintrin.h: Same.
gcc/testsuite
* gcc.target/powerpc/sse4_2-pcmpgtq.c: Tighten dg constraints
to minimally Power8.
-rw-r--r-- | gcc/config/rs6000/emmintrin.h | 12 | ||||
-rw-r--r-- | gcc/config/rs6000/pmmintrin.h | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/smmintrin.h | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/tmmintrin.h | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c | 4 |
5 files changed, 24 insertions, 12 deletions
diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h index ce1287e..32ad72b 100644 --- a/gcc/config/rs6000/emmintrin.h +++ b/gcc/config/rs6000/emmintrin.h @@ -430,20 +430,10 @@ _mm_cmpnge_pd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_pd (__m128d __A, __m128d __B) { -#if _ARCH_PWR8 __v2du c, d; /* Compare against self will return false (0's) if NAN. */ c = (__v2du)vec_cmpeq (__A, __A); d = (__v2du)vec_cmpeq (__B, __B); -#else - __v2du a, b; - __v2du c, d; - const __v2du double_exp_mask = {0x7ff0000000000000, 0x7ff0000000000000}; - a = (__v2du)vec_abs ((__v2df)__A); - b = (__v2du)vec_abs ((__v2df)__B); - c = (__v2du)vec_cmpgt (double_exp_mask, a); - d = (__v2du)vec_cmpgt (double_exp_mask, b); -#endif /* A != NAN and B != NAN. */ return ((__m128d)vec_and(c, d)); } @@ -1472,6 +1462,7 @@ _mm_mul_su32 (__m64 __A, __m64 __B) return ((__m64)a * (__m64)b); } +#ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_epu32 (__m128i __A, __m128i __B) { @@ -1498,6 +1489,7 @@ _mm_mul_epu32 (__m128i __A, __m128i __B) return (__m128i) vec_mule ((__v4su)__A, (__v4su)__B); #endif } +#endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi16 (__m128i __A, int __B) diff --git a/gcc/config/rs6000/pmmintrin.h b/gcc/config/rs6000/pmmintrin.h index eab712f..83dff1d 100644 --- a/gcc/config/rs6000/pmmintrin.h +++ b/gcc/config/rs6000/pmmintrin.h @@ -123,17 +123,21 @@ _mm_hsub_pd (__m128d __X, __m128d __Y) vec_mergel ((__v2df) __X, (__v2df)__Y)); } +#ifdef _ARCH_PWR8 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_movehdup_ps (__m128 __X) { return (__m128)vec_mergeo ((__v4su)__X, (__v4su)__X); } +#endif +#ifdef _ARCH_PWR8 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_moveldup_ps (__m128 __X) { return (__m128)vec_mergee ((__v4su)__X, (__v4su)__X); } +#endif extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loaddup_pd (double const *__P) diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 90ce03d..b732fbc 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -96,6 +96,7 @@ _mm_extract_ps (__m128 __X, const int __N) return ((__v4si)__X)[__N & 3]; } +#ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8) { @@ -107,6 +108,7 @@ _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8) #endif return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask); } +#endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask) @@ -167,6 +169,7 @@ _mm_blend_pd (__m128d __A, __m128d __B, const int __imm8) return (__m128d) __r; } +#ifdef _ARCH_PWR8 __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask) @@ -175,6 +178,7 @@ _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask) const __vector __bool long long __boolmask = vec_cmplt ((__v2di) __mask, __zero); return (__m128d) vec_sel ((__v2du) __A, (__v2du) __B, (__v2du) __boolmask); } +#endif __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/rs6000/tmmintrin.h b/gcc/config/rs6000/tmmintrin.h index 9715112..a67d88c 100644 --- a/gcc/config/rs6000/tmmintrin.h +++ b/gcc/config/rs6000/tmmintrin.h @@ -350,6 +350,7 @@ _mm_shuffle_pi8 (__m64 __A, __m64 __B) return (__m64) ((__v2du) (__C))[0]; } +#ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi8 (__m128i __A, __m128i __B) @@ -361,7 +362,9 @@ _mm_sign_epi8 (__m128i __A, __m128i __B) __v16qi __conv = vec_add (__selectneg, __selectpos); return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv); } +#endif +#ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi16 (__m128i __A, __m128i __B) @@ -373,7 +376,9 @@ _mm_sign_epi16 (__m128i __A, __m128i __B) __v8hi __conv = vec_add (__selectneg, __selectpos); return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv); } +#endif +#ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_epi32 (__m128i __A, __m128i __B) @@ -385,7 +390,9 @@ _mm_sign_epi32 (__m128i __A, __m128i __B) __v4si __conv = vec_add (__selectneg, __selectpos); return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv); } +#endif +#ifdef _ARCH_PWR8 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi8 (__m64 __A, __m64 __B) @@ -396,7 +403,9 @@ _mm_sign_pi8 (__m64 __A, __m64 __B) __C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D); return (__m64) ((__v2du) (__C))[0]; } +#endif +#ifdef _ARCH_PWR8 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi16 (__m64 __A, __m64 __B) @@ -407,7 +416,9 @@ _mm_sign_pi16 (__m64 __A, __m64 __B) __C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D); return (__m64) ((__v2du) (__C))[0]; } +#endif +#ifdef _ARCH_PWR8 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sign_pi32 (__m64 __A, __m64 __B) @@ -418,6 +429,7 @@ _mm_sign_pi32 (__m64 __A, __m64 __B) __C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D); return (__m64) ((__v2du) (__C))[0]; } +#endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c b/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c index e8ecd9c..36b9bd7 100644 --- a/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c +++ b/gcc/testsuite/gcc.target/powerpc/sse4_2-pcmpgtq.c @@ -1,6 +1,6 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mvsx" } */ -/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mpower8-vector" } */ +/* { dg-require-effective-target p8vector_hw } */ #ifndef CHECK_H #define CHECK_H "sse4_2-check.h" |