aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorkonglin1 <lingling.kong@intel.com>2021-12-07 17:08:23 +0800
committerliuhongt <hongtao.liu@intel.com>2021-12-23 17:32:51 +0800
commit61e53698a08dc1d9a54d785218af687a6751c1b3 (patch)
treed310cc9f07fe28888824b353d383bb72c7c39f43 /gcc/config
parent9ac0730c25b357b5fc75e18677cec27a546c1b64 (diff)
downloadgcc-61e53698a08dc1d9a54d785218af687a6751c1b3.zip
gcc-61e53698a08dc1d9a54d785218af687a6751c1b3.tar.gz
gcc-61e53698a08dc1d9a54d785218af687a6751c1b3.tar.bz2
i386: Enable intrinsics that convert float and bf16 data to each other.
gcc/ChangeLog: * config/i386/avx512bf16intrin.h (_mm_cvtsbh_ss): Add new intrinsic. (_mm512_cvtpbh_ps): Likewise. (_mm512_maskz_cvtpbh_ps): Likewise. (_mm512_mask_cvtpbh_ps): Likewise. * config/i386/avx512bf16vlintrin.h (_mm_cvtness_sbh): Likewise. (_mm_cvtpbh_ps): Likewise. (_mm256_cvtpbh_ps): Likewise. (_mm_maskz_cvtpbh_ps): Likewise. (_mm256_maskz_cvtpbh_ps): Likewise. (_mm_mask_cvtpbh_ps): Likewise. (_mm256_mask_cvtpbh_ps): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bf16-cvtsbh2ss-1.c: New test. * gcc.target/i386/avx512bf16-vcvtpbh2ps-1.c: Ditto. * gcc.target/i386/avx512bf16vl-cvtness2sbh-1.c: Ditto. * gcc.target/i386/avx512bf16vl-vcvtpbh2ps-1.c: Ditto.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/avx512bf16intrin.h36
-rw-r--r--gcc/config/i386/avx512bf16vlintrin.h63
2 files changed, 99 insertions, 0 deletions
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
index 9afc6bd..6b62dc3 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -41,6 +41,16 @@ typedef short __v32bh __attribute__ ((__vector_size__ (64)));
vector types, and their scalar components. */
typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
+/* Convert One BF16 Data to One Single Float Data. */
+extern __inline float
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsbh_ss (__bfloat16 __A)
+{
+ union{ float a; unsigned int b;} __tmp;
+ __tmp.b = ((unsigned int)(__A)) << 16;
+ return __tmp.a;
+}
+
/* vcvtne2ps2bf16 */
extern __inline __m512bh
@@ -110,6 +120,32 @@ _mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
}
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpbh_ps (__m256bh __A)
+{
+ return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 (
+ (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16));
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A)
+{
+ return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 (
+ (__m512i)_mm512_maskz_cvtepi16_epi32 (
+ (__mmask16)__U, (__m256i)__A), 16));
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
+{
+ return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 (
+ (__m512i)__S, (__mmask16)__U,
+ (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
+}
+
#ifdef __DISABLE_AVX512BF16__
#undef __DISABLE_AVX512BF16__
#pragma GCC pop_options
diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h
index 6dd396d..5e6a650 100644
--- a/gcc/config/i386/avx512bf16vlintrin.h
+++ b/gcc/config/i386/avx512bf16vlintrin.h
@@ -43,6 +43,7 @@ typedef short __v8bh __attribute__ ((__vector_size__ (16)));
typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef unsigned short __bfloat16;
/* vcvtne2ps2bf16 */
extern __inline __m256bh
@@ -175,6 +176,68 @@ _mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
}
+extern __inline __bfloat16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtness_sbh (float __A)
+{
+ __v4sf __V = {__A, 0, 0, 0};
+ __v8hi __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V,
+ (__v8hi)_mm_undefined_si128 (), (__mmask8)-1);
+ return __R[0];
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpbh_ps (__m128bh __A)
+{
+ return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
+ (__m128i)_mm_cvtepi16_epi32 ((__m128i)__A), 16));
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpbh_ps (__m128bh __A)
+{
+ return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
+ (__m256i)_mm256_cvtepi16_epi32 ((__m128i)__A), 16));
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
+{
+ return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
+ (__m128i)_mm_maskz_cvtepi16_epi32 (
+ (__mmask8)__U, (__m128i)__A), 16));
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
+{
+ return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
+ (__m256i)_mm256_maskz_cvtepi16_epi32 (
+ (__mmask8)__U, (__m128i)__A), 16));
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A)
+{
+ return (__m128)_mm_castsi128_ps ((__m128i)_mm_mask_slli_epi32 (
+ (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32 (
+ (__m128i)__A), 16));
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A)
+{
+ return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_mask_slli_epi32 (
+ (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32 (
+ (__m128i)__A), 16));
+}
+
#ifdef __DISABLE_AVX512BF16VL__
#undef __DISABLE_AVX512BF16VL__
#pragma GCC pop_options