aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config.gcc2
-rw-r--r--gcc/config/i386/avx512fp16intrin.h251
-rw-r--r--gcc/config/i386/avx512fp16vlintrin.h219
-rw-r--r--gcc/config/i386/i386-builtin-types.def7
-rw-r--r--gcc/config/i386/i386-builtin.def20
-rw-r--r--gcc/config/i386/i386-expand.c5
-rw-r--r--gcc/config/i386/immintrin.h2
-rw-r--r--gcc/config/i386/sse.md61
-rw-r--r--gcc/config/i386/subst.md6
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-1.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16-11a.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16-11b.c75
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vlfp16-11a.c68
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vlfp16-11b.c96
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c6
19 files changed, 871 insertions, 27 deletions
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 710f9ce..ccf41f6 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -416,7 +416,7 @@ i[34567]86-*-* | x86_64-*-*)
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
amxbf16intrin.h x86gprintrin.h uintrintrin.h
hresetintrin.h keylockerintrin.h avxvnniintrin.h
- mwaitintrin.h avx512fp16intrin.h"
+ mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 3fc0770..3e9d676 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -217,6 +217,257 @@ _mm_store_sh (void *__P, __m128h __A)
*(_Float16 *) __P = ((__v8hf)__A)[0];
}
+/* Intrinsics v[add,sub,mul,div]ph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_ph (__m512h __A, __m512h __B)
+{
+ return (__m512h) ((__v32hf) __A + (__v32hf) __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_vaddph_v32hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_vaddph_v32hf_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_ph (__m512h __A, __m512h __B)
+{
+ return (__m512h) ((__v32hf) __A - (__v32hf) __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_vsubph_v32hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_vsubph_v32hf_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_ph (__m512h __A, __m512h __B)
+{
+ return (__m512h) ((__v32hf) __A * (__v32hf) __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_vmulph_v32hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_vmulph_v32hf_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_ph (__m512h __A, __m512h __B)
+{
+ return (__m512h) ((__v32hf) __A / (__v32hf) __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_vdivph_v32hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_vdivph_v32hf_mask (__B, __C,
+ _mm512_setzero_ph (), __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_vaddph_v32hf_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_vaddph_v32hf_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_vaddph_v32hf_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_vsubph_v32hf_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_vsubph_v32hf_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_vsubph_v32hf_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_vmulph_v32hf_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_vmulph_v32hf_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_vmulph_v32hf_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_vdivph_v32hf_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_vdivph_v32hf_mask_round (__C, __D, __A, __B, __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_vdivph_v32hf_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+#else
+#define _mm512_add_round_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((A), (B), \
+ _mm512_setzero_ph (),\
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_add_round_ph(A, B, C, D, E) \
+ ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_add_round_ph(A, B, C, D) \
+ ((__m512h)__builtin_ia32_vaddph_v32hf_mask_round((B), (C), \
+ _mm512_setzero_ph (),\
+ (A), (D)))
+
+#define _mm512_sub_round_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((A), (B), \
+ _mm512_setzero_ph (),\
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_sub_round_ph(A, B, C, D, E) \
+ ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_sub_round_ph(A, B, C, D) \
+ ((__m512h)__builtin_ia32_vsubph_v32hf_mask_round((B), (C), \
+ _mm512_setzero_ph (),\
+ (A), (D)))
+
+#define _mm512_mul_round_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((A), (B), \
+ _mm512_setzero_ph (),\
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_mul_round_ph(A, B, C, D, E) \
+ ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_mul_round_ph(A, B, C, D) \
+ ((__m512h)__builtin_ia32_vmulph_v32hf_mask_round((B), (C), \
+ _mm512_setzero_ph (),\
+ (A), (D)))
+
+#define _mm512_div_round_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((A), (B), \
+ _mm512_setzero_ph (),\
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_div_round_ph(A, B, C, D, E) \
+ ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_div_round_ph(A, B, C, D) \
+ ((__m512h)__builtin_ia32_vdivph_v32hf_mask_round((B), (C), \
+ _mm512_setzero_ph (),\
+ (A), (D)))
+#endif /* __OPTIMIZE__ */
+
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
new file mode 100644
index 0000000..75fa9eb
--- /dev/null
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -0,0 +1,219 @@
+/* Copyright (C) 2019 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512FP16VLINTRIN_H_INCLUDED
+#define __AVX512FP16VLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
+#pragma GCC push_options
+#pragma GCC target("avx512fp16,avx512vl")
+#define __DISABLE_AVX512FP16VL__
+#endif /* __AVX512FP16VL__ */
+
+/* Intrinsics v[add,sub,mul,div]ph. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ph (__m128h __A, __m128h __B)
+{
+ return (__m128h) ((__v8hf) __A + (__v8hf) __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_ph (__m256h __A, __m256h __B)
+{
+ return (__m256h) ((__v16hf) __A + (__v16hf) __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vaddph_v8hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+ return __builtin_ia32_vaddph_v16hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vaddph_v8hf_mask (__B, __C, _mm_setzero_ph (),
+ __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+ return __builtin_ia32_vaddph_v16hf_mask (__B, __C,
+ _mm256_setzero_ph (), __A);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ph (__m128h __A, __m128h __B)
+{
+ return (__m128h) ((__v8hf) __A - (__v8hf) __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_ph (__m256h __A, __m256h __B)
+{
+ return (__m256h) ((__v16hf) __A - (__v16hf) __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vsubph_v8hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+ return __builtin_ia32_vsubph_v16hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vsubph_v8hf_mask (__B, __C, _mm_setzero_ph (),
+ __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+ return __builtin_ia32_vsubph_v16hf_mask (__B, __C,
+ _mm256_setzero_ph (), __A);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ph (__m128h __A, __m128h __B)
+{
+ return (__m128h) ((__v8hf) __A * (__v8hf) __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_ph (__m256h __A, __m256h __B)
+{
+ return (__m256h) ((__v16hf) __A * (__v16hf) __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vmulph_v8hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+ return __builtin_ia32_vmulph_v16hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vmulph_v8hf_mask (__B, __C, _mm_setzero_ph (),
+ __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+ return __builtin_ia32_vmulph_v16hf_mask (__B, __C,
+ _mm256_setzero_ph (), __A);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ph (__m128h __A, __m128h __B)
+{
+ return (__m128h) ((__v8hf) __A / (__v8hf) __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_ph (__m256h __A, __m256h __B)
+{
+ return (__m256h) ((__v16hf) __A / (__v16hf) __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vdivph_v8hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
+{
+ return __builtin_ia32_vdivph_v16hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vdivph_v8hf_mask (__B, __C, _mm_setzero_ph (),
+ __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+ return __builtin_ia32_vdivph_v16hf_mask (__B, __C,
+ _mm256_setzero_ph (), __A);
+}
+
+#ifdef __DISABLE_AVX512FP16VL__
+#undef __DISABLE_AVX512FP16VL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512FP16VL__ */
+
+#endif /* __AVX512FP16VLINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 4df6ee1..fdc46bd 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -98,6 +98,7 @@ DEF_VECTOR_TYPE (V16UQI, UQI, V16QI)
# AVX vectors
DEF_VECTOR_TYPE (V4DF, DOUBLE)
DEF_VECTOR_TYPE (V8SF, FLOAT)
+DEF_VECTOR_TYPE (V16HF, FLOAT16)
DEF_VECTOR_TYPE (V4DI, DI)
DEF_VECTOR_TYPE (V8SI, SI)
DEF_VECTOR_TYPE (V16HI, HI)
@@ -108,6 +109,7 @@ DEF_VECTOR_TYPE (V16UHI, UHI, V16HI)
# AVX512F vectors
DEF_VECTOR_TYPE (V32SF, FLOAT)
+DEF_VECTOR_TYPE (V32HF, FLOAT16)
DEF_VECTOR_TYPE (V16SF, FLOAT)
DEF_VECTOR_TYPE (V8DF, DOUBLE)
DEF_VECTOR_TYPE (V8DI, DI)
@@ -1302,3 +1304,8 @@ DEF_FUNCTION_TYPE (UINT8, PV2DI, PCV2DI, PCVOID)
# FP16 builtins
DEF_FUNCTION_TYPE (V8HF, V8HI)
+DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, V8HF, UQI)
+DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI)
+DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT)
+DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI)
+DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, V32HF, USI, INT)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 4b1ae0e..2f152096 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2774,6 +2774,20 @@ BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf, "__builti
BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__builtin_ia32_dpbf16ps_v4sf_mask", IX86_BUILTIN_DPHI16PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
+/* AVX512FP16. */
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_vaddph_v8hf_mask", IX86_BUILTIN_VADDPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_vaddph_v16hf_mask", IX86_BUILTIN_VADDPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_vaddph_v32hf_mask", IX86_BUILTIN_VADDPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_vsubph_v8hf_mask", IX86_BUILTIN_VSUBPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_vsubph_v16hf_mask", IX86_BUILTIN_VSUBPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_vsubph_v32hf_mask", IX86_BUILTIN_VSUBPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_vmulph_v8hf_mask", IX86_BUILTIN_VMULPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_vmulph_v16hf_mask", IX86_BUILTIN_VMULPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_vmulph_v32hf_mask", IX86_BUILTIN_VMULPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_vdivph_v8hf_mask", IX86_BUILTIN_VDIVPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_vdivph_v16hf_mask", IX86_BUILTIN_VDIVPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_vdivph_v32hf_mask", IX86_BUILTIN_VDIVPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
@@ -2973,6 +2987,12 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_truncv8dfv8di2_mask_round, "
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
+/* AVX512FP16. */
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_vaddph_v32hf_mask_round", IX86_BUILTIN_VADDPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_vsubph_v32hf_mask_round", IX86_BUILTIN_VSUBPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_vmulph_v32hf_mask_round", IX86_BUILTIN_VMULPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_vdivph_v32hf_mask_round", IX86_BUILTIN_VDIVPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+
BDESC_END (ROUND_ARGS, MULTI_ARG)
/* FMA4 and XOP. */
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index badbacc..ad9c672 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -10038,6 +10038,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
+ case V32HF_FTYPE_V32HF_V32HF_V32HF_USI:
case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
@@ -10055,6 +10056,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
+ case V16HF_FTYPE_V16HF_V16HF_V16HF_UHI:
case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
@@ -10062,6 +10064,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
+ case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI:
case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
@@ -10738,6 +10741,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case INT_FTYPE_V4SF_INT:
nargs = 2;
break;
+ case V32HF_FTYPE_V32HF_V32HF_INT:
case V4SF_FTYPE_V4SF_UINT_INT:
case V4SF_FTYPE_V4SF_UINT64_INT:
case V2DF_FTYPE_V2DF_UINT64_INT:
@@ -10778,6 +10782,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
+ case V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT:
case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
case V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT:
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 2421a78..1761c75 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -96,6 +96,8 @@
#ifdef __SSE2__
#include <avx512fp16intrin.h>
+
+#include <avx512fp16vlintrin.h>
#endif
#include <shaintrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9c67750..0633916 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -298,6 +298,13 @@
[(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+(define_mode_iterator VFH
+ [(V32HF "TARGET_AVX512FP16")
+ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+
;; 128- and 256-bit float vector modes
(define_mode_iterator VF_128_256
[(V8SF "TARGET_AVX") V4SF
@@ -321,6 +328,13 @@
(define_mode_iterator VF2
[(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+;; All DFmode & HFmode vector float modes
+(define_mode_iterator VF2H
+ [(V32HF "TARGET_AVX512FP16")
+ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+
;; 128- and 256-bit DF vector modes
(define_mode_iterator VF2_128_256
[(V4DF "TARGET_AVX") V2DF])
@@ -885,6 +899,7 @@
(V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
(V16SI "QI") (V8SI "QI") (V4SI "QI")
(V8DI "QI") (V4DI "QI") (V2DI "QI")
+ (V32HF "HI") (V16HF "QI") (V8HF "QI")
(V16SF "QI") (V8SF "QI") (V4SF "QI")
(V8DF "QI") (V4DF "QI") (V2DF "QI")])
@@ -2032,18 +2047,18 @@
})
(define_expand "<insn><mode>3<mask_name><round_name>"
- [(set (match_operand:VF 0 "register_operand")
- (plusminus:VF
- (match_operand:VF 1 "<round_nimm_predicate>")
- (match_operand:VF 2 "<round_nimm_predicate>")))]
+ [(set (match_operand:VFH 0 "register_operand")
+ (plusminus:VFH
+ (match_operand:VFH 1 "<round_nimm_predicate>")
+ (match_operand:VFH 2 "<round_nimm_predicate>")))]
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<insn><mode>3<mask_name><round_name>"
- [(set (match_operand:VF 0 "register_operand" "=x,v")
- (plusminus:VF
- (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
- (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
+ [(set (match_operand:VFH 0 "register_operand" "=x,v")
+ (plusminus:VFH
+ (match_operand:VFH 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
+ (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
"TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -2121,18 +2136,18 @@
})
(define_expand "mul<mode>3<mask_name><round_name>"
- [(set (match_operand:VF 0 "register_operand")
- (mult:VF
- (match_operand:VF 1 "<round_nimm_predicate>")
- (match_operand:VF 2 "<round_nimm_predicate>")))]
+ [(set (match_operand:VFH 0 "register_operand")
+ (mult:VFH
+ (match_operand:VFH 1 "<round_nimm_predicate>")
+ (match_operand:VFH 2 "<round_nimm_predicate>")))]
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*mul<mode>3<mask_name><round_name>"
- [(set (match_operand:VF 0 "register_operand" "=x,v")
- (mult:VF
- (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
- (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
+ [(set (match_operand:VFH 0 "register_operand" "=x,v")
+ (mult:VFH
+ (match_operand:VFH 1 "<bcst_round_nimm_predicate>" "%0,v")
+ (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
"TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -2195,9 +2210,9 @@
(set_attr "mode" "<ssescalarmode>")])
(define_expand "div<mode>3"
- [(set (match_operand:VF2 0 "register_operand")
- (div:VF2 (match_operand:VF2 1 "register_operand")
- (match_operand:VF2 2 "vector_operand")))]
+ [(set (match_operand:VF2H 0 "register_operand")
+ (div:VF2H (match_operand:VF2H 1 "register_operand")
+ (match_operand:VF2H 2 "vector_operand")))]
"TARGET_SSE2")
(define_expand "div<mode>3"
@@ -2236,10 +2251,10 @@
})
(define_insn "<sse>_div<mode>3<mask_name><round_name>"
- [(set (match_operand:VF 0 "register_operand" "=x,v")
- (div:VF
- (match_operand:VF 1 "register_operand" "0,v")
- (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
+ [(set (match_operand:VFH 0 "register_operand" "=x,v")
+ (div:VFH
+ (match_operand:VFH 1 "register_operand" "0,v")
+ (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
"TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
div<ssemodesuffix>\t{%2, %0|%0, %2}
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 6614e04..94426a5 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -24,6 +24,7 @@
V32HI V16HI V8HI
V16SI V8SI V4SI
V8DI V4DI V2DI
+ V32HF V16HF V8HF
V16SF V8SF V4SF
V8DF V4DF V2DF])
@@ -35,6 +36,7 @@
V32HI V16HI V8HI
V16SI V8SI V4SI
V8DI V4DI V2DI
+ V32HF V16HF V8HF
V16SF V8SF V4SF
V8DF V4DF V2DF
QI HI SI DI SF DF])
@@ -161,7 +163,9 @@
(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode
|| <MODE>mode == V8DFmode
|| <MODE>mode == V8DImode
- || <MODE>mode == V16SImode)")
+ || <MODE>mode == V16SImode
+ || <MODE>mode == V32HFmode)")
+
(define_subst_attr "round_modev8sf_condition" "round" "1" "(<MODE>mode == V8SFmode)")
(define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
(define_subst_attr "round_codefor" "round" "*" "")
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index f367607..1eaee86 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -maes -mpclmul -mgfni -mavx512bw -mavx512fp16 -mavx512vl" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
@@ -685,6 +685,12 @@
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
+/* avx512fp16intrin.h */
+#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
+
/* vpclmulqdqintrin.h */
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
diff --git a/gcc/testsuite/gcc.target/i386/avx-2.c b/gcc/testsuite/gcc.target/i386/avx-2.c
index 1751c52..642ae4d 100644
--- a/gcc/testsuite/gcc.target/i386/avx-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mavx2 -msse4a -maes -mpclmul -mavx512bw -mavx512fp16 -mavx512vl" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-11a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-11a.c
new file mode 100644
index 0000000..28492fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-11a.c
@@ -0,0 +1,36 @@
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#include <immintrin.h>
+__m512h
+__attribute__ ((noinline, noclone))
+vadd512 (__m512h a, __m512h b)
+{
+ return a + b;
+}
+
+__m512h
+__attribute__ ((noinline, noclone))
+vsub512 (__m512h a, __m512h b)
+{
+ return a - b;
+}
+
+__m512h
+__attribute__ ((noinline, noclone))
+vmul512 (__m512h a, __m512h b)
+{
+ return a * b;
+}
+
+__m512h
+__attribute__ ((noinline, noclone))
+vdiv512 (__m512h a, __m512h b)
+{
+ return a / b;
+}
+
+/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%zmm\[01\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-11b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-11b.c
new file mode 100644
index 0000000..5f51a5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-11b.c
@@ -0,0 +1,75 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mfpmath=sse" } */
+
+#include <string.h>
+#include <stdlib.h>
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512-check.h"
+#include "avx512fp16-11a.c"
+
+/* Get random float16 between -50.x to 50.x. */
+_Float16
+get_float16_noround()
+{
+ return ((int) (100.0 * rand ()/ (RAND_MAX + 1.0)) - 50)
+ + 0.1f * (int) (10 * rand() / (RAND_MAX + 1.0));
+}
+
+static void
+do_test (void)
+{
+ _Float16 x[32];
+ _Float16 y[32];
+ _Float16 res_add[32];
+ _Float16 res_sub[32];
+ _Float16 res_mul[32];
+ _Float16 res_div[32];
+ for (int i = 0 ; i != 32; i++)
+ {
+ x[i] = get_float16_noround ();
+ y[i] = get_float16_noround ();
+ if (y[i] == 0)
+ y[i] = 1.0f;
+ res_add[i] = x[i] + y[i];
+ res_sub[i] = x[i] - y[i];
+ res_mul[i] = x[i] * y[i];
+ res_div[i] = x[i] / y[i];
+
+ }
+
+ union512h u512 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
+ x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15],
+ x[16], x[17], x[18], x[19], x[20], x[21], x[22], x[23],
+ x[24], x[25], x[26], x[27], x[28], x[29], x[30], x[31] };
+ union512h u512_1 = {y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7],
+ y[8], y[9], y[10], y[11], y[12], y[13], y[14], y[15],
+ y[16], y[17], y[18], y[19], y[20], y[21], y[22], y[23],
+ y[24], y[25], y[26], y[27], y[28], y[29], y[30], y[31] };
+
+ __m512h v512;
+ union512h a512;
+
+ memset (&v512, -1, sizeof (v512));
+ v512 = vadd512 (u512.x, u512_1.x);
+ a512.x = v512;
+ if (check_union512h (a512, res_add))
+ abort ();
+ memset (&v512, -1, sizeof (v512));
+ v512 = vsub512 (u512.x, u512_1.x);
+ a512.x = v512;
+ if (check_union512h (a512, res_sub))
+ abort ();
+ memset (&v512, -1, sizeof (v512));
+ v512 = vmul512 (u512.x, u512_1.x);
+ a512.x = v512;
+ if (check_union512h (a512, res_mul))
+ abort ();
+ memset (&v512, -1, sizeof (v512));
+ v512 = vdiv512 (u512.x, u512_1.x);
+ a512.x = v512;
+ if (check_union512h (a512, res_div))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vlfp16-11a.c b/gcc/testsuite/gcc.target/i386/avx512vlfp16-11a.c
new file mode 100644
index 0000000..a8c6296
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vlfp16-11a.c
@@ -0,0 +1,68 @@
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+#include <immintrin.h>
+__m128h
+__attribute__ ((noinline, noclone))
+vadd128 (__m128h a, __m128h b)
+{
+ return a + b;
+}
+
+__m256h
+__attribute__ ((noinline, noclone))
+vadd256 (__m256h a, __m256h b)
+{
+ return a + b;
+}
+
+__m128h
+__attribute__ ((noinline, noclone))
+vsub128 (__m128h a, __m128h b)
+{
+ return a - b;
+}
+
+__m256h
+__attribute__ ((noinline, noclone))
+vsub256 (__m256h a, __m256h b)
+{
+ return a - b;
+}
+
+__m128h
+__attribute__ ((noinline, noclone))
+vmul128 (__m128h a, __m128h b)
+{
+ return a * b;
+}
+
+__m256h
+__attribute__ ((noinline, noclone))
+vmul256 (__m256h a, __m256h b)
+{
+ return a * b;
+}
+
+__m128h
+__attribute__ ((noinline, noclone))
+vdiv128 (__m128h a, __m128h b)
+{
+ return a / b;
+}
+
+__m256h
+__attribute__ ((noinline, noclone))
+vdiv256 (__m256h a, __m256h b)
+{
+ return a / b;
+}
+
+/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%xmm\[01\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivph\[ \\t\]+\[^\n\r\]*%ymm\[01\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vlfp16-11b.c b/gcc/testsuite/gcc.target/i386/avx512vlfp16-11b.c
new file mode 100644
index 0000000..b8d3e8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vlfp16-11b.c
@@ -0,0 +1,96 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+#include <string.h>
+#include <stdlib.h>
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+#include "avx512-check.h"
+#include "avx512vlfp16-11a.c"
+
+/* Get random float16 between -50.x to 50.x. */
+_Float16
+get_float16_noround()
+{
+ return ((int) (100.0 * rand ()/ (RAND_MAX + 1.0)) - 50)
+ + 0.1f * (int) (10 * rand() / (RAND_MAX + 1.0));
+}
+
+static void
+do_test (void)
+{
+ _Float16 x[16];
+ _Float16 y[16];
+ _Float16 res_add[16];
+ _Float16 res_sub[16];
+ _Float16 res_mul[16];
+ _Float16 res_div[16];
+ for (int i = 0 ; i != 16; i++)
+ {
+ x[i] = get_float16_noround ();
+ y[i] = get_float16_noround ();
+ if (y[i] == 0)
+ y[i] = 1.0f;
+ res_add[i] = x[i] + y[i];
+ res_sub[i] = x[i] - y[i];
+ res_mul[i] = x[i] * y[i];
+ res_div[i] = x[i] / y[i];
+
+ }
+
+ union128h u128 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7] };
+ union128h u128_1 = { y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7] };
+ union256h u256 = { x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7],
+ x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] };
+ union256h u256_1 = { y[0], y[1], y[2], y[3], y[4], y[5], y[6], y[7],
+ y[8], y[9], y[10], y[11], y[12], y[13], y[14], y[15]};
+
+ __m128h v128;
+ __m256h v256;
+ union128h a128;
+ union256h a256;
+
+ memset (&v128, -1, sizeof (v128));
+ v128 = vadd128 (u128.x, u128_1.x);
+ a128.x = v128;
+ if (check_union128h (a128, res_add))
+ abort ();
+ memset (&v128, -1, sizeof (v128));
+ v128 = vsub128 (u128.x, u128_1.x);
+ a128.x = v128;
+ if (check_union128h (a128, res_sub))
+ abort ();
+ memset (&v128, -1, sizeof (v128));
+ v128 = vmul128 (u128.x, u128_1.x);
+ a128.x = v128;
+ if (check_union128h (a128, res_mul))
+ abort ();
+ memset (&v128, -1, sizeof (v128));
+ v128 = vdiv128 (u128.x, u128_1.x);
+ a128.x = v128;
+ if (check_union128h (a128, res_div))
+ abort ();
+
+ memset (&v256, -1, sizeof (v256));
+ v256 = vadd256 (u256.x, u256_1.x);
+ a256.x = v256;
+ if (check_union256h (a256, res_add))
+ abort ();
+ memset (&v256, -1, sizeof (v256));
+ v256 = vsub256 (u256.x, u256_1.x);
+ a256.x = v256;
+ if (check_union256h (a256, res_sub))
+ abort ();
+ memset (&v256, -1, sizeof (v256));
+ v256 = vmul256 (u256.x, u256_1.x);
+ a256.x = v256;
+ if (check_union256h (a256, res_mul))
+ abort ();
+ memset (&v256, -1, sizeof (v256));
+ v256 = vdiv256 (u256.x, u256_1.x);
+ a256.x = v256;
+ if (check_union256h (a256, res_div))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index f5f5c11..50ed74c 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -702,6 +702,12 @@
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
+/* avx512fp16intrin.h */
+#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
+
/* vpclmulqdqintrin.h */
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 747d504..26a5e94 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -667,6 +667,20 @@ test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 8)
test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 8)
test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 8)
+/* avx512fp16intrin.h */
+test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
+test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
+test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
+test_2 (_mm512_div_round_ph, __m512h, __m512h, __m512h, 8)
+test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_3 (_mm512_maskz_div_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm512_mask_div_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+
/* shaintrin.h */
test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 3341196..8d25eff 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -772,6 +772,20 @@ test_2 (_mm_rcp28_round_ss, __m128, __m128, __m128, 8)
test_2 (_mm_rsqrt28_round_sd, __m128d, __m128d, __m128d, 8)
test_2 (_mm_rsqrt28_round_ss, __m128, __m128, __m128, 8)
+/* avx512fp16intrin.h */
+test_2 (_mm512_add_round_ph, __m512h, __m512h, __m512h, 8)
+test_2 (_mm512_sub_round_ph, __m512h, __m512h, __m512h, 8)
+test_2 (_mm512_mul_round_ph, __m512h, __m512h, __m512h, 8)
+test_2 (_mm512_div_round_ph, __m512h, __m512h, __m512h, 8)
+test_3 (_mm512_maskz_add_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_3 (_mm512_maskz_sub_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_3 (_mm512_maskz_mul_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_3 (_mm512_maskz_div_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm512_mask_sub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm512_mask_mul_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm512_mask_div_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+
/* shaintrin.h */
test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 86590ca..f7dd5d7 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -703,6 +703,12 @@
#define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1)
#define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E)
+/* avx512fp16intrin.h */
+#define __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vaddph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vsubph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vmulph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vdivph_v32hf_mask_round(A, B, C, D, 8)
+
/* vpclmulqdqintrin.h */
#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)