diff options
author | dianhong xu <dianhong.xu@intel.com> | 2021-06-15 17:00:35 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2021-09-22 18:27:28 +0800 |
commit | 6185b9a93cb78828db2b1a583aa2a2155c86e305 (patch) | |
tree | 5aee514a56f000ad94b683bc3d853d9be91a80ce /gcc | |
parent | 3ae5e6fbc89f45ef6874c3d37b75ae63311943a3 (diff) | |
download | gcc-6185b9a93cb78828db2b1a583aa2a2155c86e305.zip gcc-6185b9a93cb78828db2b1a583aa2a2155c86e305.tar.gz gcc-6185b9a93cb78828db2b1a583aa2a2155c86e305.tar.bz2 |
AVX512FP16: Support load/store/abs intrinsics.
gcc/ChangeLog:
* config/i386/avx512fp16intrin.h (__m512h_u, __m256h_u,
__m128h_u): New typedef.
(_mm512_load_ph): New intrinsic.
(_mm256_load_ph): Ditto.
(_mm_load_ph): Ditto.
(_mm512_loadu_ph): Ditto.
(_mm256_loadu_ph): Ditto.
(_mm_loadu_ph): Ditto.
(_mm512_store_ph): Ditto.
(_mm256_store_ph): Ditto.
(_mm_store_ph): Ditto.
(_mm512_storeu_ph): Ditto.
(_mm256_storeu_ph): Ditto.
(_mm_storeu_ph): Ditto.
(_mm512_abs_ph): Ditto.
* config/i386/avx512fp16vlintrin.h
(_mm_abs_ph): Ditto.
(_mm256_abs_ph): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512fp16-13.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/avx512fp16intrin.h | 100 | ||||
-rw-r--r-- | gcc/config/i386/avx512fp16vlintrin.h | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512fp16-13.c | 140 |
3 files changed, 256 insertions, 0 deletions
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index e01cff6..8f81bc42 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -45,6 +45,14 @@ typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__)); typedef _Float16 __m256h __attribute__ ((__vector_size__ (32), __may_alias__)); typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__)); +/* Unaligned version of the same type. */ +typedef _Float16 __m128h_u __attribute__ ((__vector_size__ (16), \ + __may_alias__, __aligned__ (1))); +typedef _Float16 __m256h_u __attribute__ ((__vector_size__ (32), \ + __may_alias__, __aligned__ (1))); +typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \ + __may_alias__, __aligned__ (1))); + extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_ph (_Float16 __A7, _Float16 __A6, _Float16 __A5, @@ -362,6 +370,48 @@ _mm_load_sh (void const *__P) *(_Float16 const *) __P); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_load_ph (void const *__P) +{ + return *(const __m512h *) __P; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_load_ph (void const *__P) +{ + return *(const __m256h *) __P; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_ph (void const *__P) +{ + return *(const __m128h *) __P; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_loadu_ph (void const *__P) +{ + return *(const __m512h_u *) __P; +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_loadu_ph (void const *__P) +{ + return *(const __m256h_u *) __P; +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadu_ph (void const *__P) +{ + return *(const __m128h_u *) __P; +} + /* Stores the lower _Float16 value. */ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -370,6 +420,56 @@ _mm_store_sh (void *__P, __m128h __A) *(_Float16 *) __P = ((__v8hf)__A)[0]; } +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_store_ph (void *__P, __m512h __A) +{ + *(__m512h *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_store_ph (void *__P, __m256h __A) +{ + *(__m256h *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_ph (void *__P, __m128h __A) +{ + *(__m128h *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_storeu_ph (void *__P, __m512h __A) +{ + *(__m512h_u *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_storeu_ph (void *__P, __m256h __A) +{ + *(__m256h_u *) __P = __A; +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_ph (void *__P, __m128h __A) +{ + *(__m128h_u *) __P = __A; +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_abs_ph (__m512h __A) +{ + return (__m512h) _mm512_and_epi32 ( _mm512_set1_epi32 (0x7FFF7FFF), + (__m512i) __A); +} + /* Intrinsics v[add,sub,mul,div]ph. */ extern __inline __m512h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index 9f71af2..7694bf8 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -425,6 +425,22 @@ _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C) _mm256_setzero_ph (), __A); } +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_abs_ph (__m128h __A) +{ + return (__m128h) _mm_and_si128 ( _mm_set1_epi32 (0x7FFF7FFF), + (__m128i) __A); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_abs_ph (__m256h __A) +{ + return (__m256h) _mm256_and_si256 ( _mm256_set1_epi32 (0x7FFF7FFF), + (__m256i) __A); +} + /* vcmpph */ #ifdef __OPTIMIZE extern __inline __mmask8 diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c new file mode 100644 index 0000000..c3bae65 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c @@ -0,0 +1,140 @@ +/* { dg-do compile} */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ + +#include <immintrin.h> +void +__attribute__ ((noinline, noclone)) +store512_ph (void *p, __m512h a) +{ + _mm512_store_ph (p, a); +} + +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)" 1 } } */ + +void +__attribute__ ((noinline, noclone)) +store256_ph (void *p, __m256h a) +{ + _mm256_store_ph (p, a); +} + +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)" 1 } } */ + +void +__attribute__ ((noinline, noclone)) +store_ph (void *p, __m128h a) +{ + _mm_store_ph (p, a); +} + +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)" 1 } } */ + +__m512h +__attribute__ ((noinline, noclone)) +load512_ph (void const *p) +{ + return _mm512_load_ph (p); +} + +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)" 1 } } */ + +__m256h +__attribute__ ((noinline, noclone)) +load256_ph (void const *p) +{ + return _mm256_load_ph (p); +} + +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)" 1 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +load_ph (void const *p) +{ + return _mm_load_ph (p); +} +/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)" 1 } } */ + +__m512h +__attribute__ ((noinline, noclone)) +load512u_ph (void const *p) +{ + return _mm512_loadu_ph (p); +} + +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%zmm\[0-9\]" 1 } } */ + +__m256h +__attribute__ ((noinline, noclone)) +load256u_ph (void const *p) +{ + return _mm256_loadu_ph (p); +} + +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%ymm\[0-9\]" 1 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +load128u_ph (void const *p) +{ + return _mm_loadu_ph (p); +} + +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%xmm\[0-9\]" 1 } } */ + +void +__attribute__ ((noinline, noclone)) +store512u_ph (void *p, __m512h a) +{ + return _mm512_storeu_ph (p, a); +} + +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%zmm\[0-9\], *\[^,\]*" 1 } } */ + +void +__attribute__ ((noinline, noclone)) +store256u_ph (void *p, __m256h a) +{ + return _mm256_storeu_ph (p, a); +} + +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%ymm\[0-9\], *\[^,\]*" 1 } } */ + +void +__attribute__ ((noinline, noclone)) +storeu_ph (void *p, __m128h a) +{ + return _mm_storeu_ph (p, a); +} + +/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%xmm\[0-9\], *\[^,\]*" 1 } } */ + +__m512h +__attribute__ ((noinline, noclone)) +abs512_ph (__m512h a) +{ + return _mm512_abs_ph (a); +} + +/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpandd\[^\n\]*%zmm\[0-9\]+" 1 } } */ + +__m256h +__attribute__ ((noinline, noclone)) +abs256_ph (__m256h a) +{ + return _mm256_abs_ph (a); +} + +/* { dg-final { scan-assembler-times "vpbroadcastq\[^\n\]*%ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpand\[^\n\]*%ymm\[0-9\]+" 1 } } */ + +__m128h +__attribute__ ((noinline, noclone)) +abs_ph (__m128h a) +{ + return _mm_abs_ph (a); +} + +/* { dg-final { scan-assembler-times "vpbroadcastq\[^\n\]*%xmm\[0-9\]+" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpand\[^\n\]*%xmm\[0-9\]+" 1 } } */ |