aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authordianhong xu <dianhong.xu@intel.com>2021-06-15 17:00:35 +0800
committerliuhongt <hongtao.liu@intel.com>2021-09-22 18:27:28 +0800
commit6185b9a93cb78828db2b1a583aa2a2155c86e305 (patch)
tree5aee514a56f000ad94b683bc3d853d9be91a80ce /gcc
parent3ae5e6fbc89f45ef6874c3d37b75ae63311943a3 (diff)
downloadgcc-6185b9a93cb78828db2b1a583aa2a2155c86e305.zip
gcc-6185b9a93cb78828db2b1a583aa2a2155c86e305.tar.gz
gcc-6185b9a93cb78828db2b1a583aa2a2155c86e305.tar.bz2
AVX512FP16: Support load/store/abs intrinsics.
gcc/ChangeLog: * config/i386/avx512fp16intrin.h (__m512h_u, __m256h_u, __m128h_u): New typedef. (_mm512_load_ph): New intrinsic. (_mm256_load_ph): Ditto. (_mm_load_ph): Ditto. (_mm512_loadu_ph): Ditto. (_mm256_loadu_ph): Ditto. (_mm_loadu_ph): Ditto. (_mm512_store_ph): Ditto. (_mm256_store_ph): Ditto. (_mm_store_ph): Ditto. (_mm512_storeu_ph): Ditto. (_mm256_storeu_ph): Ditto. (_mm_storeu_ph): Ditto. (_mm512_abs_ph): Ditto. * config/i386/avx512fp16vlintrin.h (_mm_abs_ph): Ditto. (_mm256_abs_ph): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-13.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/avx512fp16intrin.h100
-rw-r--r--gcc/config/i386/avx512fp16vlintrin.h16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16-13.c140
3 files changed, 256 insertions, 0 deletions
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index e01cff6..8f81bc42 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -45,6 +45,14 @@ typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__));
typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
+/* Unaligned version of the same type. */
+typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \
+ __may_alias__, __aligned__ (1)));
+typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \
+ __may_alias__, __aligned__ (1)));
+typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \
+ __may_alias__, __aligned__ (1)));
+
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5,
@@ -362,6 +370,48 @@ _mm_load_sh (void const *__P)
*(_Float16 const *) __P);
}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_load_ph (void const *__P)
+{
+ return *(const __m512h *) __P;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_ph (void const *__P)
+{
+ return *(const __m256h *) __P;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ph (void const *__P)
+{
+ return *(const __m128h *) __P;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_ph (void const *__P)
+{
+ return *(const __m512h_u *) __P;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_ph (void const *__P)
+{
+ return *(const __m256h_u *) __P;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ph (void const *__P)
+{
+ return *(const __m128h_u *) __P;
+}
+
/* Stores the lower _Float16 value. */
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -370,6 +420,56 @@ _mm_store_sh (void *__P, __m128h __A)
*(_Float16 *) __P = ((__v8hf)__A)[0];
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_store_ph (void *__P, __m512h __A)
+{
+ *(__m512h *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_ph (void *__P, __m256h __A)
+{
+ *(__m256h *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ph (void *__P, __m128h __A)
+{
+ *(__m128h *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_ph (void *__P, __m512h __A)
+{
+ *(__m512h_u *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_ph (void *__P, __m256h __A)
+{
+ *(__m256h_u *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ph (void *__P, __m128h __A)
+{
+ *(__m128h_u *) __P = __A;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_ph (__m512h __A)
+{
+ return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF),
+ (__m512i) __A);
+}
+
/* Intrinsics v[add,sub,mul,div]ph. */
extern __inline __m512h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 9f71af2..7694bf8 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -425,6 +425,22 @@ _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
_mm256_setzero_ph (), __A);
}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_ph (__m128h __A)
+{
+ return (__m128h) _mm_and_si128 ( _mm_set1_epi32 (0x7FFF7FFF),
+ (__m128i) __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_ph (__m256h __A)
+{
+ return (__m256h) _mm256_and_si256 ( _mm256_set1_epi32 (0x7FFF7FFF),
+ (__m256i) __A);
+}
+
/* vcmpph */
#ifdef __OPTIMIZE
extern __inline __mmask8
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c
new file mode 100644
index 0000000..c3bae65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c
@@ -0,0 +1,140 @@
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+#include <immintrin.h>
+void
+__attribute__ ((noinline, noclone))
+store512_ph (void *p, __m512h a)
+{
+ _mm512_store_ph (p, a);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)" 1 } } */
+
+void
+__attribute__ ((noinline, noclone))
+store256_ph (void *p, __m256h a)
+{
+ _mm256_store_ph (p, a);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)" 1 } } */
+
+void
+__attribute__ ((noinline, noclone))
+store_ph (void *p, __m128h a)
+{
+ _mm_store_ph (p, a);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)" 1 } } */
+
+__m512h
+__attribute__ ((noinline, noclone))
+load512_ph (void const *p)
+{
+ return _mm512_load_ph (p);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)" 1 } } */
+
+__m256h
+__attribute__ ((noinline, noclone))
+load256_ph (void const *p)
+{
+ return _mm256_load_ph (p);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)" 1 } } */
+
+__m128h
+__attribute__ ((noinline, noclone))
+load_ph (void const *p)
+{
+ return _mm_load_ph (p);
+}
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)" 1 } } */
+
+__m512h
+__attribute__ ((noinline, noclone))
+load512u_ph (void const *p)
+{
+ return _mm512_loadu_ph (p);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%zmm\[0-9\]" 1 } } */
+
+__m256h
+__attribute__ ((noinline, noclone))
+load256u_ph (void const *p)
+{
+ return _mm256_loadu_ph (p);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%ymm\[0-9\]" 1 } } */
+
+__m128h
+__attribute__ ((noinline, noclone))
+load128u_ph (void const *p)
+{
+ return _mm_loadu_ph (p);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%xmm\[0-9\]" 1 } } */
+
+void
+__attribute__ ((noinline, noclone))
+store512u_ph (void *p, __m512h a)
+{
+ return _mm512_storeu_ph (p, a);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%zmm\[0-9\], *\[^,\]*" 1 } } */
+
+void
+__attribute__ ((noinline, noclone))
+store256u_ph (void *p, __m256h a)
+{
+ return _mm256_storeu_ph (p, a);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%ymm\[0-9\], *\[^,\]*" 1 } } */
+
+void
+__attribute__ ((noinline, noclone))
+storeu_ph (void *p, __m128h a)
+{
+ return _mm_storeu_ph (p, a);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%xmm\[0-9\], *\[^,\]*" 1 } } */
+
+__m512h
+__attribute__ ((noinline, noclone))
+abs512_ph (__m512h a)
+{
+ return _mm512_abs_ph (a);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpandd\[^\n\]*%zmm\[0-9\]+" 1 } } */
+
+__m256h
+__attribute__ ((noinline, noclone))
+abs256_ph (__m256h a)
+{
+ return _mm256_abs_ph (a);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastq\[^\n\]*%ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpand\[^\n\]*%ymm\[0-9\]+" 1 } } */
+
+__m128h
+__attribute__ ((noinline, noclone))
+abs_ph (__m128h a)
+{
+ return _mm_abs_ph (a);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastq\[^\n\]*%xmm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpand\[^\n\]*%xmm\[0-9\]+" 1 } } */