diff options
author | Haochen Jiang <haochen.jiang@intel.com> | 2023-01-10 09:35:08 +0800 |
---|---|---|
committer | Haochen Jiang <haochen.jiang@intel.com> | 2023-04-20 09:32:33 +0800 |
commit | e8571019066d9820c5cd4b3019b816203d438e83 (patch) | |
tree | 111ded8575466abc62e20cf0a4ec4d38e448e021 | |
parent | 4fb12ae93ddf6dea9a30041cecc94911d7863556 (diff) | |
download | gcc-e8571019066d9820c5cd4b3019b816203d438e83.zip gcc-e8571019066d9820c5cd4b3019b816203d438e83.tar.gz gcc-e8571019066d9820c5cd4b3019b816203d438e83.tar.bz2 |
i386: Fix vpblendm{b,w} intrins and insns
For vpblendm{b,w}, they actually do not have constant parameters.
Therefore, there is no need for them been wrapped in __OPTIMIZE__.
Also, we should check TARGET_AVX512VL for 128/256 bit vectors.
gcc/ChangeLog:
* config/i386/avx512vlbwintrin.h
(_mm_mask_blend_epi16): Remove __OPTIMIZE__ wrapper.
(_mm_mask_blend_epi8): Ditto.
(_mm256_mask_blend_epi16): Ditto.
(_mm256_mask_blend_epi8): Ditto.
* config/i386/avx512vlintrin.h
(_mm256_mask_blend_pd): Ditto.
(_mm256_mask_blend_ps): Ditto.
(_mm256_mask_blend_epi64): Ditto.
(_mm256_mask_blend_epi32): Ditto.
(_mm_mask_blend_pd): Ditto.
(_mm_mask_blend_ps): Ditto.
(_mm_mask_blend_epi64): Ditto.
(_mm_mask_blend_epi32): Ditto.
* config/i386/sse.md (VF_AVX512BWHFBF16): Removed.
(VF_AVX512HFBFVL): Move it before the first usage.
(<avx512>_blendm<mode>): Change iterator from VF_AVX512BWHFBF16
to VF_AVX512HFBFVL.
-rw-r--r-- | gcc/config/i386/avx512vlbwintrin.h | 92 | ||||
-rw-r--r-- | gcc/config/i386/avx512vlintrin.h | 184 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 17 |
3 files changed, 115 insertions, 178 deletions
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 0232783..9d2aba2 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -259,6 +259,42 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, + (__v16qi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, + (__v32qi) __W, + (__mmask32) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi16_epi8 (__m256i __A) { @@ -1442,42 +1478,6 @@ _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B, (__mmask8) __U); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, - (__v16qi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, - (__v16hi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, - (__v32qi) __W, - (__mmask32) __U); -} - extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y, @@ -1986,26 +1986,6 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) (__v8hi)(__m128i)_mm_setzero_si128(), \ (__mmask8)(U))) -#define _mm_mask_blend_epi16(__U, __A, __W) \ - ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A), \ - (__v8hi) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_epi8(__U, __A, __W) \ - ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A), \ - (__v16qi) (__W), \ - (__mmask16) (__U))) - -#define _mm256_mask_blend_epi16(__U, __A, __W) \ - ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A), \ - (__v16hi) (__W), \ - (__mmask16) (__U))) - -#define _mm256_mask_blend_epi8(__U, __A, __W) \ - ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A), \ - (__v32qi) (__W), \ - (__mmask32) (__U))) - #define _mm_cmp_epi16_mask(X, Y, P) \ ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \ (__v8hi)(__m128i)(Y), (int)(P),\ diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 758b71a..4a717a7 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -935,6 +935,78 @@ _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) (__mmask8) __U); } +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) +{ + return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A, + (__v4df) __W, + (__mmask8) __U); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) +{ + return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A, + (__v8sf) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A, + (__v4di) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A, + (__v8si) __W, + (__mmask8) __U); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) +{ + return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A, + (__v2df) __W, + (__mmask8) __U); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) +{ + return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A, + (__v2di) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A, + (__v4si) __W, + (__mmask8) __U); +} + extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A) @@ -12262,78 +12334,6 @@ _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C) (__mmask8) __U); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) -{ - return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A, - (__v4df) __W, - (__mmask8) __U); -} - -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) -{ - return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A, - (__v8sf) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A, - (__v4di) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A, - (__v8si) __W, - (__mmask8) __U); -} - -extern __inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) -{ - return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A, - (__v2df) __W, - (__mmask8) __U); -} - -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) -{ - return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A, - (__v4sf) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A, - (__v2di) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A, - (__v4si) __W, - (__mmask8) __U); -} - extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P) @@ -13717,46 +13717,6 @@ _mm256_permutex_pd (__m256d __X, const int __M) (__v4sf)(__m128)_mm_setzero_ps (), \ (__mmask8)(U))) -#define _mm256_mask_blend_pd(__U, __A, __W) \ - ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \ - (__v4df) (__W), \ - (__mmask8) (__U))) - -#define _mm256_mask_blend_ps(__U, __A, __W) \ - ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \ - (__v8sf) (__W), \ - (__mmask8) (__U))) - -#define _mm256_mask_blend_epi64(__U, __A, __W) \ - ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \ - (__v4di) (__W), \ - (__mmask8) (__U))) - -#define _mm256_mask_blend_epi32(__U, __A, __W) \ - ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \ - (__v8si) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_pd(__U, __A, __W) \ - ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \ - (__v2df) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_ps(__U, __A, __W) \ - ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \ - (__v4sf) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_epi64(__U, __A, __W) \ - ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \ - (__v2di) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_epi32(__U, __A, __W) \ - ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \ - (__v4si) (__W), \ - (__mmask8) (__U))) - #define _mm256_cmp_epu32_mask(X, Y, P) \ ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \ (__v8si)(__m256i)(Y), (int)(P),\ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b5236ad..ddc9fd2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -444,8 +444,9 @@ [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16") V32BF V16BF V8BF]) -(define_mode_iterator VF_AVX512BWHFBF16 - [V32HF V16HF V8HF V32BF V16BF V8BF]) +(define_mode_iterator VF_AVX512HFBFVL + [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") + V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) (define_mode_iterator VF_AVX512FP16VL [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) @@ -1585,10 +1586,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "<avx512>_blendm<mode>" - [(set (match_operand:VF_AVX512BWHFBF16 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512BWHFBF16 - (match_operand:VF_AVX512BWHFBF16 2 "nonimmediate_operand" "vm,vm") - (match_operand:VF_AVX512BWHFBF16 1 "nonimm_or_0_operand" "0C,v") + [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand" "=v,v") + (vec_merge:VF_AVX512HFBFVL + (match_operand:VF_AVX512HFBFVL 2 "nonimmediate_operand" "vm,vm") + (match_operand:VF_AVX512HFBFVL 1 "nonimm_or_0_operand" "0C,v") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] "TARGET_AVX512BW" "@ @@ -4545,10 +4546,6 @@ DONE; }) -(define_mode_iterator VF_AVX512HFBFVL - [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") - V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) - (define_expand "vcond<mode><sseintvecmodelower>" [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand") (if_then_else:VF_AVX512HFBFVL |