aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2020-02-25 10:42:13 +0800
committerliuhongt <hongtao.liu@intel.com>2021-09-17 16:04:29 +0800
commita5873aadb6dd749558924eba3e1b4c21755721ba (patch)
tree751cac997be70046d5706b6032a68d08e929b862 /gcc
parent1ef291e68fae9c9f5645a88ef4b8ead4c0ae8a9a (diff)
downloadgcc-a5873aadb6dd749558924eba3e1b4c21755721ba.zip
gcc-a5873aadb6dd749558924eba3e1b4c21755721ba.tar.gz
gcc-a5873aadb6dd749558924eba3e1b4c21755721ba.tar.bz2
AVX512FP16: Add intrinsics for casting between vector float16 and vector float32/float64/integer.
gcc/ChangeLog: * config/i386/avx512fp16intrin.h (_mm_undefined_ph): New intrinsic. (_mm256_undefined_ph): Likewise. (_mm512_undefined_ph): Likewise. (_mm_cvtsh_h): Likewise. (_mm256_cvtsh_h): Likewise. (_mm512_cvtsh_h): Likewise. (_mm512_castph_ps): Likewise. (_mm512_castph_pd): Likewise. (_mm512_castph_si512): Likewise. (_mm512_castph512_ph128): Likewise. (_mm512_castph512_ph256): Likewise. (_mm512_castph128_ph512): Likewise. (_mm512_castph256_ph512): Likewise. (_mm512_zextph128_ph512): Likewise. (_mm512_zextph256_ph512): Likewise. (_mm512_castps_ph): Likewise. (_mm512_castpd_ph): Likewise. (_mm512_castsi512_ph): Likewise. * config/i386/avx512fp16vlintrin.h (_mm_castph_ps): New intrinsic. (_mm256_castph_ps): Likewise. (_mm_castph_pd): Likewise. (_mm256_castph_pd): Likewise. (_mm_castph_si128): Likewise. (_mm256_castph_si256): Likewise. (_mm_castps_ph): Likewise. (_mm256_castps_ph): Likewise. (_mm_castpd_ph): Likewise. (_mm256_castpd_ph): Likewise. (_mm_castsi128_ph): Likewise. (_mm256_castsi256_ph): Likewise. (_mm256_castph256_ph128): Likewise. (_mm256_castph128_ph256): Likewise. (_mm256_zextph128_ph256): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-typecast-1.c: New test. * gcc.target/i386/avx512fp16-typecast-2.c: Ditto. * gcc.target/i386/avx512fp16vl-typecast-1.c: Ditto. * gcc.target/i386/avx512fp16vl-typecast-2.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/avx512fp16intrin.h153
-rw-r--r--gcc/config/i386/avx512fp16vlintrin.h117
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c37
6 files changed, 449 insertions, 0 deletions
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 3b236ec..a5041ed 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -192,6 +192,159 @@ _mm512_setzero_ph (void)
return _mm512_set1_ph (0.0f);
}
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ph (void)
+{
+ __m128h __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_ph (void)
+{
+ __m256h __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ph (void)
+{
+ __m512h __Y = __Y;
+ return __Y;
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_h (__m128h __A)
+{
+ return __A[0];
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsh_h (__m256h __A)
+{
+ return __A[0];
+}
+
+extern __inline _Float16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsh_h (__m512h __A)
+{
+ return __A[0];
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_ps (__m512h __a)
+{
+ return (__m512) __a;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_pd (__m512h __a)
+{
+ return (__m512d) __a;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph_si512 (__m512h __a)
+{
+ return (__m512i) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph128 (__m512h __A)
+{
+ union
+ {
+ __m128h a[4];
+ __m512h v;
+ } u = { .v = __A };
+ return u.a[0];
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph512_ph256 (__m512h __A)
+{
+ union
+ {
+ __m256h a[2];
+ __m512h v;
+ } u = { .v = __A };
+ return u.a[0];
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph128_ph512 (__m128h __A)
+{
+ union
+ {
+ __m128h a[4];
+ __m512h v;
+ } u;
+ u.a[0] = __A;
+ return u.v;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castph256_ph512 (__m256h __A)
+{
+ union
+ {
+ __m256h a[2];
+ __m512h v;
+ } u;
+ u.a[0] = __A;
+ return u.v;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph128_ph512 (__m128h __A)
+{
+ return (__m512h) _mm512_insertf32x4 (_mm512_setzero_ps (),
+ (__m128) __A, 0);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_zextph256_ph512 (__m256h __A)
+{
+ return (__m512h) _mm512_insertf64x4 (_mm512_setzero_pd (),
+ (__m256d) __A, 0);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castps_ph (__m512 __a)
+{
+ return (__m512h) __a;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castpd_ph (__m512d __a)
+{
+ return (__m512h) __a;
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_castsi512_ph (__m512i __a)
+{
+ return (__m512h) __a;
+}
+
/* Create a vector with element 0 as F and the rest zero. */
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 59128fd..59906d2 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -34,6 +34,123 @@
#define __DISABLE_AVX512FP16VL__
#endif /* __AVX512FP16VL__ */
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_ps (__m128h __a)
+{
+ return (__m128) __a;
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_ps (__m256h __a)
+{
+ return (__m256) __a;
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_pd (__m128h __a)
+{
+ return (__m128d) __a;
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_pd (__m256h __a)
+{
+ return (__m256d) __a;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castph_si128 (__m128h __a)
+{
+ return (__m128i) __a;
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph_si256 (__m256h __a)
+{
+ return (__m256i) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_ph (__m128 __a)
+{
+ return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_ph (__m256 __a)
+{
+ return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ph (__m128d __a)
+{
+ return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ph (__m256d __a)
+{
+ return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ph (__m128i __a)
+{
+ return (__m128h) __a;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ph (__m256i __a)
+{
+ return (__m256h) __a;
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph256_ph128 (__m256h __A)
+{
+ union
+ {
+ __m128h a[2];
+ __m256h v;
+ } u = { .v = __A };
+ return u.a[0];
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castph128_ph256 (__m128h __A)
+{
+ union
+ {
+ __m128h a[2];
+ __m256h v;
+ } u;
+ u.a[0] = __A;
+ return u.v;
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zextph128_ph256 (__m128h __A)
+{
+ return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (),
+ (__m128) __A, 0);
+}
+
/* Intrinsics v[add,sub,mul,div]ph. */
extern __inline __m128h
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c
new file mode 100644
index 0000000..cf0cc74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-1.c
@@ -0,0 +1,44 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void
+test_512 (void)
+{
+ V512 res;
+
+ res.ymmh[0] = _mm512_castph512_ph256 (src1.zmmh);
+ check_results (&res, &src1, 16, "_mm512_castph512_ph256");
+
+ res.xmmh[0] = _mm512_castph512_ph128 (src1.zmmh);
+ check_results (&res, &src1, 8, "_mm512_castph512_ph128");
+
+ res.zmmh = _mm512_castph256_ph512 (src1.ymmh[0]);
+ check_results (&res, &src1, 16, "_mm512_castph256_ph512");
+
+ res.zmmh = _mm512_castph128_ph512 (src1.xmmh[0]);
+ check_results (&res, &src1, 8, "_mm512_castph128_ph512");
+
+ res.zmm = _mm512_castph_ps (src1.zmmh);
+ check_results (&res, &src1, 32, "_mm512_castph_ps");
+
+ res.zmmd = _mm512_castph_pd (src1.zmmh);
+ check_results (&res, &src1, 32, "_mm512_castph_pd");
+
+ res.zmmi = _mm512_castph_si512 (src1.zmmh);
+ check_results (&res, &src1, 32, "_mm512_castph_si512");
+
+ res.zmmh = _mm512_castps_ph (src1.zmm);
+ check_results (&res, &src1, 32, "_mm512_castps_ph");
+
+ res.zmmh = _mm512_castpd_ph (src1.zmmd);
+ check_results (&res, &src1, 32, "_mm512_castpd_ph");
+
+ res.zmmh = _mm512_castsi512_ph (src1.zmmi);
+ check_results (&res, &src1, 32, "_mm512_castsi512_ph");
+
+ if (n_errs != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c
new file mode 100644
index 0000000..a29f1db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-typecast-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512f-check.h"
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+void
+do_test (void)
+{
+ union512i_d zero;
+ union512h ad;
+ union256h b,bd;
+ union128h c;
+
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ b.a[i] = 65.43f + i;
+ zero.a[i] = 0;
+ }
+
+ for (i = 0; i < 8; i++)
+ {
+ c.a[i] = 32.01f + i;
+ }
+
+ ad.x = _mm512_zextph256_ph512 (b.x);
+ if (memcmp (ad.a, b.a, 32)
+ || memcmp (&ad.a[16], &zero.a, 32))
+ abort ();
+
+ ad.x = _mm512_zextph128_ph512 (c.x);
+ if (memcmp (ad.a, c.a, 16)
+ || memcmp (&ad.a[8], &zero.a, 48))
+ abort ();
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c
new file mode 100644
index 0000000..3621bb5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-1.c
@@ -0,0 +1,55 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+void
+test_512 (void)
+{
+ V512 res;
+ res.xmm[0] = _mm_castph_ps (src1.xmmh[0]);
+ check_results (&res, &src1, 8, "_mm_castph_ps");
+
+ res.xmmd[0] = _mm_castph_pd (src1.xmmh[0]);
+ check_results (&res, &src1, 8, "_mm_castph_pd");
+
+ res.xmmi[0] = _mm_castph_si128 (src1.xmmh[0]);
+ check_results (&res, &src1, 8, "_mm_castph_si128");
+
+ res.xmmh[0] = _mm_castps_ph (src1.xmm[0]);
+ check_results (&res, &src1, 8, "_mm_castps_ph");
+
+ res.xmmh[0] = _mm_castpd_ph (src1.xmmd[0]);
+ check_results (&res, &src1, 8, "_mm_castpd_ph");
+
+ res.xmmh[0] = _mm_castsi128_ph (src1.xmmi[0]);
+ check_results (&res, &src1, 8, "_mm_castsi128_ph");
+
+ res.ymm[0] = _mm256_castph_ps (src1.ymmh[0]);
+ check_results (&res, &src1, 16, "_mm256_castph_ps");
+
+ res.ymmd[0] = _mm256_castph_pd (src1.ymmh[0]);
+ check_results (&res, &src1, 16, "_mm256_castph_pd");
+
+ res.ymmi[0] = _mm256_castph_si256 (src1.ymmh[0]);
+ check_results (&res, &src1, 16, "_mm256_castph_si256");
+
+ res.ymmh[0] = _mm256_castps_ph (src1.ymm[0]);
+ check_results (&res, &src1, 16, "_mm256_castps_ph");
+
+ res.ymmh[0] = _mm256_castpd_ph (src1.ymmd[0]);
+ check_results (&res, &src1, 16, "_mm256_castpd_ph");
+
+ res.ymmh[0] = _mm256_castsi256_ph (src1.ymmi[0]);
+ check_results (&res, &src1, 16, "_mm256_castsi256_ph");
+
+ res.xmmh[0] = _mm256_castph256_ph128 (src1.ymmh[0]);
+ check_results (&res, &src1, 8, "_mm256_castph256_ph128");
+
+ res.ymmh[0] = _mm256_castph128_ph256 (src1.xmmh[0]);
+ check_results (&res, &src1, 8, "_mm256_castph128_ph256");
+
+ if (n_errs != 0)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c
new file mode 100644
index 0000000..dce387f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-typecast-2.c
@@ -0,0 +1,37 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512f-check.h"
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+void
+do_test (void)
+{
+ union512i_d zero;
+ union512h ad;
+ union256h b,bd;
+ union128h c;
+
+ int i;
+
+ for (i = 0; i < 16; i++)
+ {
+ b.a[i] = 65.43f + i;
+ zero.a[i] = 0;
+ }
+
+ for (i = 0; i < 8; i++)
+ {
+ c.a[i] = 32.01f + i;
+ }
+
+ bd.x = _mm256_zextph128_ph256 (c.x);
+ if (memcmp (bd.a, c.a, 16)
+ || memcmp (&bd.a[8], &zero.a, 16))
+ abort ();
+}