diff options
35 files changed, 2514 insertions, 2 deletions
diff --git a/gcc/config.gcc b/gcc/config.gcc index 5e9c36a..7d761b2 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -454,7 +454,7 @@ i[34567]86-*-* | x86_64-*-*) sm3intrin.h sha512intrin.h sm4intrin.h usermsrintrin.h avx10_2roundingintrin.h avx10_2mediaintrin.h avx10_2-512mediaintrin.h - avx10_2convertintrin.h avx10_2-512convertintrin.h" + avx10_2bf16intrin.h avx10_2-512bf16intrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h new file mode 100644 index 0000000..b409ea1 --- /dev/null +++ b/gcc/config/i386/avx10_2-512bf16intrin.h @@ -0,0 +1,364 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <avx10_2-512bf16intrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED +#define _AVX10_2_512BF16INTRIN_H_INCLUDED + +#if !defined (__AVX10_2_512__) +#pragma GCC push_options +#pragma GCC target("avx10.2-512") +#define __DISABLE_AVX10_2_512__ +#endif /* __AVX10_2_512__ */ + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_addne_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_addnepbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_addne_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_addnepbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_addne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_addnepbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_subne_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_subnepbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_subne_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_subnepbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_subne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_subnepbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mulne_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_mulnepbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mulne_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_mulnepbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mulne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_mulnepbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_divne_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_divnepbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_divne_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_divnepbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_divne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_divnepbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_maxpbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_maxpbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_maxpbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_minpbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_minpbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_minpbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_scalefpbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_scalefpbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_scalefpbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmaddne_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddnepbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmaddne_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddnepbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmaddne_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fmaddnepbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmaddne_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddnepbf16512_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsubne_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubnepbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsubne_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubnepbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsubne_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fmsubnepbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsubne_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubnepbf16512_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmaddne_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddnepbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmaddne_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddnepbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmaddne_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fnmaddnepbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmaddne_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddnepbf16512_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsubne_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubnepbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsubne_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubnepbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsubne_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fnmsubnepbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsubne_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubnepbf16512_maskz (__A, __B, __C, __U); +} + +#ifdef __DISABLE_AVX10_2_512__ +#undef __DISABLE_AVX10_2_512__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX10_2_512__ */ + +#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h new file mode 100644 index 0000000..e16f1b6 --- /dev/null +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -0,0 +1,685 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use <avx10_2bf16intrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AVX10_2BF16INTRIN_H_INCLUDED +#define _AVX10_2BF16INTRIN_H_INCLUDED + +#if !defined(__AVX10_2_256__) +#pragma GCC push_options +#pragma GCC target("avx10.2") +#define __DISABLE_AVX10_2_256__ +#endif /* __AVX10_2_256__ */ + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_addne_pbh (__m256bh __A, __m256bh __B) +{ + return (__m256bh) __builtin_ia32_addnepbf16256 (__A, __B); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_addne_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_addnepbf16256_mask (__A, __B, __W, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_addne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_addnepbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_addne_pbh (__m128bh __A, __m128bh __B) +{ + return (__m128bh) __builtin_ia32_addnepbf16128 (__A, __B); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_addne_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_addnepbf16128_mask (__A, __B, __W, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_addne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_addnepbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_subne_pbh (__m256bh __A, __m256bh __B) +{ + return (__m256bh) __builtin_ia32_subnepbf16256 (__A, __B); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_subne_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_subnepbf16256_mask (__A, __B, __W, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_subne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_subnepbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_subne_pbh (__m128bh __A, __m128bh __B) +{ + return (__m128bh) __builtin_ia32_subnepbf16128 (__A, __B); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_subne_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_subnepbf16128_mask (__A, __B, __W, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_subne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_subnepbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mulne_pbh (__m256bh __A, __m256bh __B) +{ + return (__m256bh) __builtin_ia32_mulnepbf16256 (__A, __B); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_mulne_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_mulnepbf16256_mask (__A, __B, __W, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_mulne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_mulnepbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulne_pbh (__m128bh __A, __m128bh __B) +{ + return (__m128bh) __builtin_ia32_mulnepbf16128 (__A, __B); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_mulne_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_mulnepbf16128_mask (__A, __B, __W, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_mulne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_mulnepbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_divne_pbh (__m256bh __A, __m256bh __B) +{ + return (__m256bh) __builtin_ia32_divnepbf16256 (__A, __B); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_divne_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_divnepbf16256_mask (__A, __B, __W, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_divne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_divnepbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_divne_pbh (__m128bh __A, __m128bh __B) +{ + return (__m128bh) __builtin_ia32_divnepbf16128 (__A, __B); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_divne_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_divnepbf16128_mask (__A, __B, __W, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_divne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_divnepbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_max_pbh (__m256bh __A, __m256bh __B) +{ + return (__m256bh) __builtin_ia32_maxpbf16256 (__A, __B); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_max_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_maxpbf16256_mask (__A, __B, __W, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_max_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_maxpbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_pbh (__m128bh __A, __m128bh __B) +{ + return (__m128bh) __builtin_ia32_maxpbf16128 (__A, __B); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_max_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_maxpbf16128_mask (__A, __B, __W, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_maxpbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_min_pbh (__m256bh __A, __m256bh __B) +{ + return (__m256bh) __builtin_ia32_minpbf16256 (__A, __B); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_min_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_minpbf16256_mask (__A, __B, __W, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_min_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_minpbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_pbh (__m128bh __A, __m128bh __B) +{ + return (__m128bh) __builtin_ia32_minpbf16128 (__A, __B); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_min_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_minpbf16128_mask (__A, __B, __W, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_minpbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_scalef_pbh (__m256bh __A, __m256bh __B) +{ + return (__m256bh) __builtin_ia32_scalefpbf16256 (__A, __B); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_scalef_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_scalefpbf16256_mask (__A, __B, __W, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_scalef_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) +{ + return (__m256bh) + __builtin_ia32_scalefpbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_scalef_pbh (__m128bh __A, __m128bh __B) +{ + return (__m128bh) __builtin_ia32_scalefpbf16128 (__A, __B); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_scalefpbf16128_mask (__A, __B, __W, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) +{ + return (__m128bh) + __builtin_ia32_scalefpbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmaddne_pbh (__m256bh __A, __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fmaddnepbf16256_mask (__A, __B, __C, (__mmask16) -1); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmaddne_pbh (__m256bh __A, __mmask16 __U, + __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fmaddnepbf16256_mask (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmaddne_pbh (__m256bh __A, __m256bh __B, + __m256bh __C, __mmask16 __U) +{ + return (__m256bh) + __builtin_ia32_fmaddnepbf16256_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmaddne_pbh (__mmask16 __U, __m256bh __A, + __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fmaddnepbf16256_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmaddne_pbh (__m128bh __A, __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fmaddnepbf16128_mask (__A, __B, __C, (__mmask8) -1); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmaddne_pbh (__m128bh __A, __mmask8 __U, + __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fmaddnepbf16128_mask (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmaddne_pbh (__m128bh __A, __m128bh __B, + __m128bh __C, __mmask8 __U) +{ + return (__m128bh) + __builtin_ia32_fmaddnepbf16128_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmaddne_pbh (__mmask8 __U, __m128bh __A, + __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fmaddnepbf16128_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsubne_pbh (__m256bh __A, __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fmsubnepbf16256_mask (__A, __B, __C, (__mmask16) -1); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmsubne_pbh (__m256bh __A, __mmask16 __U, + __m256bh __B, __m256bh __C) +{ + return (__m256bh) __builtin_ia32_fmsubnepbf16256_mask (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmsubne_pbh (__m256bh __A, __m256bh __B, + __m256bh __C, __mmask16 __U) +{ + return (__m256bh) + __builtin_ia32_fmsubnepbf16256_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmsubne_pbh (__mmask16 __U, __m256bh __A, + __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fmsubnepbf16256_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsubne_pbh (__m128bh __A, __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fmsubnepbf16128_mask (__A, __B, __C, (__mmask8) -1); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsubne_pbh (__m128bh __A, __mmask8 __U, + __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fmsubnepbf16128_mask (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsubne_pbh (__m128bh __A, __m128bh __B, + __m128bh __C, __mmask8 __U) +{ + return (__m128bh) + __builtin_ia32_fmsubnepbf16128_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsubne_pbh (__mmask8 __U, __m128bh __A, + __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fmsubnepbf16128_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmaddne_pbh (__m256bh __A, __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fnmaddnepbf16256_mask (__A, __B, __C, (__mmask16) -1); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fnmaddne_pbh (__m256bh __A, __mmask16 __U, + __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fnmaddnepbf16256_mask (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fnmaddne_pbh (__m256bh __A, __m256bh __B, + __m256bh __C, __mmask16 __U) +{ + return (__m256bh) + __builtin_ia32_fnmaddnepbf16256_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fnmaddne_pbh (__mmask16 __U, __m256bh __A, + __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fnmaddnepbf16256_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmaddne_pbh (__m128bh __A, __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fnmaddnepbf16128_mask (__A, __B, __C, (__mmask8) -1); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmaddne_pbh (__m128bh __A, __mmask8 __U, + __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fnmaddnepbf16128_mask (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmaddne_pbh (__m128bh __A, __m128bh __B, + __m128bh __C, __mmask8 __U) +{ + return (__m128bh) + __builtin_ia32_fnmaddnepbf16128_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmaddne_pbh (__mmask8 __U, __m128bh __A, + __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fnmaddnepbf16128_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmsubne_pbh (__m256bh __A, __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fnmsubnepbf16256_mask (__A, __B, __C, (__mmask16) -1); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fnmsubne_pbh (__m256bh __A, __mmask16 __U, + __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fnmsubnepbf16256_mask (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fnmsubne_pbh (__m256bh __A, __m256bh __B, + __m256bh __C, __mmask16 __U) +{ + return (__m256bh) + __builtin_ia32_fnmsubnepbf16256_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m256bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fnmsubne_pbh (__mmask16 __U, __m256bh __A, + __m256bh __B, __m256bh __C) +{ + return (__m256bh) + __builtin_ia32_fnmsubnepbf16256_maskz (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsubne_pbh (__m128bh __A, __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fnmsubnepbf16128_mask (__A, __B, __C, (__mmask8) -1); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsubne_pbh (__m128bh __A, __mmask8 __U, + __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fnmsubnepbf16128_mask (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsubne_pbh (__m128bh __A, __m128bh __B, + __m128bh __C, __mmask8 __U) +{ + return (__m128bh) + __builtin_ia32_fnmsubnepbf16128_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m128bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsubne_pbh (__mmask8 __U, __m128bh __A, + __m128bh __B, __m128bh __C) +{ + return (__m128bh) + __builtin_ia32_fnmsubnepbf16128_maskz (__A, __B, __C, __U); +} + +#ifdef __DISABLE_AVX10_2_256__ +#undef __DISABLE_AVX10_2_256__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX10_2_256__ */ + +#endif /* __AVX10_2BF16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 63b6584..f383842 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1474,3 +1474,12 @@ DEF_FUNCTION_TYPE (V64QI, V32HF, V32HF, V64QI, UDI) DEF_FUNCTION_TYPE (V16QI, V8HF, V16QI, UQI) DEF_FUNCTION_TYPE (V16QI, V16HF, V16QI, UHI) DEF_FUNCTION_TYPE (V32QI, V32HF, V32QI, USI) +DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF) +DEF_FUNCTION_TYPE (V16BF, V16BF, V16BF) +DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF) +DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF, USI) +DEF_FUNCTION_TYPE (V16BF, V16BF, V16BF, UHI) +DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF, UQI) +DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF, V32BF, USI) +DEF_FUNCTION_TYPE (V16BF, V16BF, V16BF, V16BF, UHI) +DEF_FUNCTION_TYPE (V8BF, V8BF, V8BF, V8BF, UQI) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 6f5ab32..3f3bc76 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3159,6 +3159,84 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2hf8sv32hf_mask, "__bui BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv8hf_mask, "__builtin_ia32_vcvthf82ph128_mask", IX86_BUILTIN_VCVTHF82PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V16QI_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv16hf_mask, "__builtin_ia32_vcvthf82ph256_mask", IX86_BUILTIN_VCVTHF82PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16QI_V16HF_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvthf82phv32hf_mask, "__builtin_ia32_vcvthf82ph512_mask", IX86_BUILTIN_VCVTHF82PH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32QI_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_addnepbf16_v32bf, "__builtin_ia32_addnepbf16512", IX86_BUILTIN_ADDNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_addnepbf16_v32bf_mask, "__builtin_ia32_addnepbf16512_mask", IX86_BUILTIN_ADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v16bf, "__builtin_ia32_addnepbf16256", IX86_BUILTIN_ADDNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v16bf_mask, "__builtin_ia32_addnepbf16256_mask", IX86_BUILTIN_ADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v8bf, "__builtin_ia32_addnepbf16128", IX86_BUILTIN_ADDNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v8bf_mask, "__builtin_ia32_addnepbf16128_mask", IX86_BUILTIN_ADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_subnepbf16_v32bf, "__builtin_ia32_subnepbf16512", IX86_BUILTIN_SUBNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_subnepbf16_v32bf_mask, "__builtin_ia32_subnepbf16512_mask", IX86_BUILTIN_SUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v16bf, "__builtin_ia32_subnepbf16256", IX86_BUILTIN_SUBNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v16bf_mask, "__builtin_ia32_subnepbf16256_mask", IX86_BUILTIN_SUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v8bf, "__builtin_ia32_subnepbf16128", IX86_BUILTIN_SUBNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v8bf_mask, "__builtin_ia32_subnepbf16128_mask", IX86_BUILTIN_SUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mulnepbf16_v32bf, "__builtin_ia32_mulnepbf16512", IX86_BUILTIN_MULNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mulnepbf16_v32bf_mask, "__builtin_ia32_mulnepbf16512_mask", IX86_BUILTIN_MULNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v16bf, "__builtin_ia32_mulnepbf16256", IX86_BUILTIN_MULNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v16bf_mask, "__builtin_ia32_mulnepbf16256_mask", IX86_BUILTIN_MULNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v8bf, "__builtin_ia32_mulnepbf16128", IX86_BUILTIN_MULNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v8bf_mask, "__builtin_ia32_mulnepbf16128_mask", IX86_BUILTIN_MULNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_divnepbf16_v32bf, "__builtin_ia32_divnepbf16512", IX86_BUILTIN_DIVNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_divnepbf16_v32bf_mask, "__builtin_ia32_divnepbf16512_mask", IX86_BUILTIN_DIVNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v16bf, "__builtin_ia32_divnepbf16256", IX86_BUILTIN_DIVNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v16bf_mask, "__builtin_ia32_divnepbf16256_mask", IX86_BUILTIN_DIVNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v8bf, "__builtin_ia32_divnepbf16128", IX86_BUILTIN_DIVNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v8bf_mask, "__builtin_ia32_divnepbf16128_mask", IX86_BUILTIN_DIVNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf, "__builtin_ia32_maxpbf16512", IX86_BUILTIN_MAXPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf_mask, "__builtin_ia32_maxpbf16512_mask", IX86_BUILTIN_MAXPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf, "__builtin_ia32_maxpbf16256", IX86_BUILTIN_MAXPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf_mask, "__builtin_ia32_maxpbf16256_mask", IX86_BUILTIN_MAXPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf, "__builtin_ia32_maxpbf16128", IX86_BUILTIN_MAXPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf_mask, "__builtin_ia32_maxpbf16128_mask", IX86_BUILTIN_MAXPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf, "__builtin_ia32_minpbf16512", IX86_BUILTIN_MINPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf_mask, "__builtin_ia32_minpbf16512_mask", IX86_BUILTIN_MINPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf, "__builtin_ia32_minpbf16256", IX86_BUILTIN_MINPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf_mask, "__builtin_ia32_minpbf16256_mask", IX86_BUILTIN_MINPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf, "__builtin_ia32_minpbf16128", IX86_BUILTIN_MINPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf_mask, "__builtin_ia32_minpbf16128_mask", IX86_BUILTIN_MINPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf, "__builtin_ia32_scalefpbf16512", IX86_BUILTIN_SCALEFPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf_mask, "__builtin_ia32_scalefpbf16512_mask", IX86_BUILTIN_SCALEFPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf, "__builtin_ia32_scalefpbf16256", IX86_BUILTIN_SCALEFPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf_mask, "__builtin_ia32_scalefpbf16256_mask", IX86_BUILTIN_SCALEFPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf, "__builtin_ia32_scalefpbf16128", IX86_BUILTIN_SCALEFPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf_mask, "__builtin_ia32_scalefpbf16128_mask", IX86_BUILTIN_SCALEFPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_mask, "__builtin_ia32_fmaddnepbf16512_mask", IX86_BUILTIN_FMADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_mask3, "__builtin_ia32_fmaddnepbf16512_mask3", IX86_BUILTIN_FMADDNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_maskz, "__builtin_ia32_fmaddnepbf16512_maskz", IX86_BUILTIN_FMADDNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_mask, "__builtin_ia32_fmaddnepbf16256_mask", IX86_BUILTIN_FMADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_mask3, "__builtin_ia32_fmaddnepbf16256_mask3", IX86_BUILTIN_FMADDNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_maskz, "__builtin_ia32_fmaddnepbf16256_maskz", IX86_BUILTIN_FMADDNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_mask, "__builtin_ia32_fmaddnepbf16128_mask", IX86_BUILTIN_FMADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_mask3, "__builtin_ia32_fmaddnepbf16128_mask3", IX86_BUILTIN_FMADDNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_maskz, "__builtin_ia32_fmaddnepbf16128_maskz", IX86_BUILTIN_FMADDNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_mask, "__builtin_ia32_fmsubnepbf16512_mask", IX86_BUILTIN_FMSUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_mask3, "__builtin_ia32_fmsubnepbf16512_mask3", IX86_BUILTIN_FMSUBNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_maskz, "__builtin_ia32_fmsubnepbf16512_maskz", IX86_BUILTIN_FMSUBNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_mask, "__builtin_ia32_fmsubnepbf16256_mask", IX86_BUILTIN_FMSUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_mask3, "__builtin_ia32_fmsubnepbf16256_mask3", IX86_BUILTIN_FMSUBNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_maskz, "__builtin_ia32_fmsubnepbf16256_maskz", IX86_BUILTIN_FMSUBNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_mask, "__builtin_ia32_fmsubnepbf16128_mask", IX86_BUILTIN_FMSUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_mask3, "__builtin_ia32_fmsubnepbf16128_mask3", IX86_BUILTIN_FMSUBNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_maskz, "__builtin_ia32_fmsubnepbf16128_maskz", IX86_BUILTIN_FMSUBNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_mask, "__builtin_ia32_fnmaddnepbf16512_mask", IX86_BUILTIN_FNMADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_mask3, "__builtin_ia32_fnmaddnepbf16512_mask3", IX86_BUILTIN_FNMADDNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_maskz, "__builtin_ia32_fnmaddnepbf16512_maskz", IX86_BUILTIN_FNMADDNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_mask, "__builtin_ia32_fnmaddnepbf16256_mask", IX86_BUILTIN_FNMADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_mask3, "__builtin_ia32_fnmaddnepbf16256_mask3", IX86_BUILTIN_FNMADDNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_maskz, "__builtin_ia32_fnmaddnepbf16256_maskz", IX86_BUILTIN_FNMADDNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_mask, "__builtin_ia32_fnmaddnepbf16128_mask", IX86_BUILTIN_FNMADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_mask3, "__builtin_ia32_fnmaddnepbf16128_mask3", IX86_BUILTIN_FNMADDNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_maskz, "__builtin_ia32_fnmaddnepbf16128_maskz", IX86_BUILTIN_FNMADDNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_mask, "__builtin_ia32_fnmsubnepbf16512_mask", IX86_BUILTIN_FNMSUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_mask3, "__builtin_ia32_fnmsubnepbf16512_mask3", IX86_BUILTIN_FNMSUBNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_maskz, "__builtin_ia32_fnmsubnepbf16512_maskz", IX86_BUILTIN_FNMSUBNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_mask, "__builtin_ia32_fnmsubnepbf16256_mask", IX86_BUILTIN_FNMSUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_mask3, "__builtin_ia32_fnmsubnepbf16256_mask3", IX86_BUILTIN_FNMSUBNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_maskz, "__builtin_ia32_fnmsubnepbf16256_maskz", IX86_BUILTIN_FNMSUBNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_mask, "__builtin_ia32_fnmsubnepbf16128_mask", IX86_BUILTIN_FNMSUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_mask3, "__builtin_ia32_fnmsubnepbf16128_mask3", IX86_BUILTIN_FNMSUBNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_maskz, "__builtin_ia32_fnmsubnepbf16128_maskz", IX86_BUILTIN_FNMSUBNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 41d6eb8..f5fbc8e 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -11267,6 +11267,9 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16HI_FTYPE_V8SI_V8SI: case V64QI_FTYPE_V64QI_V64QI: case V32QI_FTYPE_V32QI_V32QI: + case V32BF_FTYPE_V32BF_V32BF: + case V16BF_FTYPE_V16BF_V16BF: + case V8BF_FTYPE_V8BF_V8BF: case V16HI_FTYPE_V32QI_V32QI: case V16HI_FTYPE_V16HI_V16HI: case V8SI_FTYPE_V4DF_V4DF: @@ -11434,6 +11437,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16HI_FTYPE_V8HI_V16HI_UHI: case V16HI_FTYPE_HI_V16HI_UHI: case V8HI_FTYPE_V8HI_V8HI_UQI: + case V8BF_FTYPE_V8BF_V8BF_UQI: case V8HI_FTYPE_HI_V8HI_UQI: case V16HF_FTYPE_V16HF_V16HF_UHI: case V8SF_FTYPE_V8HI_V8SF_UQI: @@ -11531,9 +11535,11 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16HF_FTYPE_V16HF_V16HF_V16HF: case V16HI_FTYPE_V16HF_V16HI_UHI: case V16HI_FTYPE_V16HI_V16HI_UHI: + case V16BF_FTYPE_V16BF_V16BF_UHI: case V8HI_FTYPE_V16QI_V8HI_UQI: case V16HI_FTYPE_V16QI_V16HI_UHI: case V32HI_FTYPE_V32HI_V32HI_USI: + case V32BF_FTYPE_V32BF_V32BF_USI: case V32HI_FTYPE_V32QI_V32HI_USI: case V8DI_FTYPE_V16QI_V8DI_UQI: case V8DI_FTYPE_V2DI_V8DI_UQI: @@ -11663,6 +11669,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, break; case V32QI_FTYPE_V32QI_V32QI_V32QI_USI: case V32HI_FTYPE_V32HI_V32HI_V32HI_USI: + case V32BF_FTYPE_V32BF_V32BF_V32BF_USI: case V32HI_FTYPE_V64QI_V64QI_V32HI_USI: case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI: case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI: @@ -11693,6 +11700,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI: case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI: case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI: + case V8BF_FTYPE_V8BF_V8BF_V8BF_UQI: case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI: case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI: case V16HF_FTYPE_V16HF_V16HF_V16HF_UQI: @@ -11700,6 +11708,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI: case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI: case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI: + case V16BF_FTYPE_V16BF_V16BF_V16BF_UHI: case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI: case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI: case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI: diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index fea55a29..0253340 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -148,4 +148,8 @@ #include <avx10_2-512convertintrin.h> +#include <avx10_2bf16intrin.h> + +#include <avx10_2-512bf16intrin.h> + #endif /* _IMMINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 622873b..dad7f61 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -229,6 +229,7 @@ UNSPEC_VCVTNEPH2HF8 UNSPEC_VCVTNEPH2HF8S UNSPEC_VCVTHF82PH + UNSPEC_VSCALEFPBF16 ]) (define_c_enum "unspecv" [ @@ -499,6 +500,9 @@ (define_mode_iterator VHF_AVX10_2 [(V32HF "TARGET_AVX10_2_512") V16HF V8HF]) +(define_mode_iterator VBF_AVX10_2 + [(V32BF "TARGET_AVX10_2_512") V16BF V8BF]) + ;; All vector integer modes (define_mode_iterator VI [(V16SI "TARGET_AVX512F && TARGET_EVEX512") @@ -31812,3 +31816,292 @@ "TARGET_AVX10_2_256" "vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}" [(set_attr "prefix" "evex")]) + +(define_insn "avx10_2_scalefpbf16_<mode><mask_name>" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") + (unspec:VBF_AVX10_2 + [(match_operand:VBF_AVX10_2 1 "register_operand" "v") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")] + UNSPEC_VSCALEFPBF16))] + "TARGET_AVX10_2_256" + "vscalefpbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + [(set_attr "prefix" "evex")]) + +(define_insn "avx10_2_<code>pbf16_<mode><mask_name>" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") + (smaxmin:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "register_operand" "v") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))] + "TARGET_AVX10_2_256" + "v<maxmin_float>pbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "avx10_2_<insn>nepbf16_<mode><mask_name>" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") + (plusminusmultdiv:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "register_operand" "v") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))] + "TARGET_AVX10_2_256" + "v<insn>nepbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + [(set_attr "prefix" "evex")]) + +(define_expand "avx10_2_fmaddnepbf16_<mode>_maskz" + [(match_operand:VBF_AVX10_2 0 "register_operand") + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] + "TARGET_AVX10_2_256" + { + emit_insn (gen_avx10_2_fmaddnepbf16_<mode>_maskz_1 (operands[0], operands[1], + operands[2], operands[3], + CONST0_RTX(<MODE>mode), + operands[4])); + DONE; + }) + +(define_insn "avx10_2_fmaddnepbf16_<mode><sd_maskz_name>" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") + (fma:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))] + "TARGET_AVX10_2_256" + "@ + vfmadd132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} + vfmadd213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} + vfmadd231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_fmaddnepbf16_<mode>_mask" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") + (vec_merge:VBF_AVX10_2 + (fma:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")) + (match_dup 1) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] + "TARGET_AVX10_2_256" + "@ + vfmadd132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfmadd213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_fmaddnepbf16_<mode>_mask3" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") + (vec_merge:VBF_AVX10_2 + (fma:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") + (match_operand:VBF_AVX10_2 3 "register_operand" "0")) + (match_dup 3) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] + "TARGET_AVX10_2_256" + "vfmadd231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx10_2_fnmaddnepbf16_<mode>_maskz" + [(match_operand:VBF_AVX10_2 0 "register_operand") + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] + "TARGET_AVX10_2_256" + { + emit_insn (gen_avx10_2_fnmaddnepbf16_<mode>_maskz_1 (operands[0], operands[1], + operands[2], operands[3], + CONST0_RTX(<MODE>mode), + operands[4])); + DONE; + }) + +(define_insn "avx10_2_fnmaddnepbf16_<mode><sd_maskz_name>" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") + (fma:VBF_AVX10_2 + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")) + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))] + "TARGET_AVX10_2_256" + "@ + vfnmadd132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} + vfnmadd213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} + vfnmadd231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_fnmaddnepbf16_<mode>_mask" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") + (vec_merge:VBF_AVX10_2 + (fma:VBF_AVX10_2 + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")) + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")) + (match_dup 1) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] + "TARGET_AVX10_2_256" + "@ + vfnmadd132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfnmadd213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_fnmaddnepbf16_<mode>_mask3" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") + (vec_merge:VBF_AVX10_2 + (fma:VBF_AVX10_2 + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")) + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") + (match_operand:VBF_AVX10_2 3 "register_operand" "0")) + (match_dup 3) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] + "TARGET_AVX10_2_256" + "vfnmadd231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx10_2_fmsubnepbf16_<mode>_maskz" + [(match_operand:VBF_AVX10_2 0 "register_operand") + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] + "TARGET_AVX10_2_256" + { + emit_insn (gen_avx10_2_fmsubnepbf16_<mode>_maskz_1 (operands[0], operands[1], + operands[2], operands[3], + CONST0_RTX(<MODE>mode), + operands[4])); + DONE; + }) + +(define_insn "avx10_2_fmsubnepbf16_<mode><sd_maskz_name>" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") + (fma:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))] + "TARGET_AVX10_2_256" + "@ + vfmsub132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} + vfmsub213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} + vfmsub231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_fmsubnepbf16_<mode>_mask" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") + (vec_merge:VBF_AVX10_2 + (fma:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) + (match_dup 1) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] + "TARGET_AVX10_2_256" + "@ + vfmsub132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfmsub213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_fmsubnepbf16_<mode>_mask3" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") + (vec_merge:VBF_AVX10_2 + (fma:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 3 "register_operand" "0"))) + (match_dup 3) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] + "TARGET_AVX10_2_256" + "vfmsub231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_expand "avx10_2_fnmsubnepbf16_<mode>_maskz" + [(match_operand:VBF_AVX10_2 0 "register_operand") + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] + "TARGET_AVX10_2_256" + { + emit_insn (gen_avx10_2_fnmsubnepbf16_<mode>_maskz_1 (operands[0], operands[1], + operands[2], operands[3], + CONST0_RTX(<MODE>mode), + operands[4])); + DONE; + }) + +(define_insn "avx10_2_fnmsubnepbf16_<mode><sd_maskz_name>" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") + (fma:VBF_AVX10_2 + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")) + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))] + "TARGET_AVX10_2_256" + "@ + vfnmsub132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} + vfnmsub213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3} + vfnmsub231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_fnmsubnepbf16_<mode>_mask" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") + (vec_merge:VBF_AVX10_2 + (fma:VBF_AVX10_2 + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")) + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) + (match_dup 1) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] + "TARGET_AVX10_2_256" + "@ + vfnmsub132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2} + vfnmsub213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "avx10_2_fnmsubnepbf16_<mode>_mask3" + [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") + (vec_merge:VBF_AVX10_2 + (fma:VBF_AVX10_2 + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")) + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") + (neg:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 3 "register_operand" "0"))) + (match_dup 3) + (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] + "TARGET_AVX10_2_256" + "vfnmsub231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssemuladd") + (set_attr "mode" "<sseinsnmode>")]) diff --git a/gcc/testsuite/gcc.target/i386/avx10-helper.h b/gcc/testsuite/gcc.target/i386/avx10-helper.h index 385c744..9ff1dd72 100644 --- a/gcc/testsuite/gcc.target/i386/avx10-helper.h +++ b/gcc/testsuite/gcc.target/i386/avx10-helper.h @@ -3,9 +3,55 @@ #define AVX10 #define AVX512FP16 - +#define AVX512BF16 #include "avx512f-helper.h" #include "avx512f-mask-type.h" +#include <stdint.h> + +#define NOINLINE __attribute__((noinline,noclone)) +typedef union +{ + uint32_t int32; + float flt; +}float_int_t; + +float NOINLINE +convert_bf16_to_fp32 (unsigned short bf16) +{ + unsigned int ii = bf16 << 16; + return *(float*)ⅈ +} + +unsigned short NOINLINE +convert_fp32_to_bf16 (float fp) +{ + float_int_t fi; + fi.flt = fp; + return ((fi.int32 >> 16) & 0xffff); +} + +unsigned short NOINLINE +convert_fp32_to_bf16_ne (float fp) +{ + float_int_t fi; + uint32_t rounding_bias, lsb; + + fi.flt = fp; + lsb = (fi.int32 >> 16) & 0x1; + rounding_bias = 0x7fff + lsb; + fi.int32 += rounding_bias; + + return ((fi.int32 >> 16) & 0xffff); +} + +float NOINLINE +scalef (float x, float y) +{ + __m128 px = _mm_load_ss (&x); + __m128 py = _mm_load_ss (&y); + __m128 out = _mm_scalef_ss (px, py); + return _mm_cvtss_f32 (out); +} #endif /* AVX10_HELPER_INCLUDED */ diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c new file mode 100644 index 0000000..78839fb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c @@ -0,0 +1,87 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2-512 -O2" } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m512bh res, x1, x2; +volatile __mmask32 m32; + +void extern +avx10_2_512_test (void) +{ + res = _mm512_addne_pbh (x1, x2); + res = _mm512_mask_addne_pbh (res, m32, x1, x2); + res = _mm512_maskz_addne_pbh (m32, x1, x2); + res = _mm512_subne_pbh (x1, x2); + res = _mm512_mask_subne_pbh (res, m32, x1, x2); + res = _mm512_maskz_subne_pbh (m32, x1, x2); + res = _mm512_mulne_pbh (x1, x2); + res = _mm512_mask_mulne_pbh (res, m32, x1, x2); + res = _mm512_maskz_mulne_pbh (m32, x1, x2); + res = _mm512_divne_pbh (x1, x2); + res = _mm512_mask_divne_pbh (res, m32, x1, x2); + res = _mm512_maskz_divne_pbh (m32, x1, x2); + res = _mm512_max_pbh (x1, x2); + res = _mm512_mask_max_pbh (res, m32, x1, x2); + res = _mm512_maskz_max_pbh (m32, x1, x2); + res = _mm512_min_pbh (x1, x2); + res = _mm512_mask_min_pbh (res, m32, x1, x2); + res = _mm512_maskz_min_pbh (m32, x1, x2); + res = _mm512_scalef_pbh (x1, x2); + res = _mm512_mask_scalef_pbh (res, m32, x1, x2); + res = _mm512_maskz_scalef_pbh (m32, x1, x2); + + res = _mm512_fmaddne_pbh (res, x1, x2); + res = _mm512_mask_fmaddne_pbh (res, m32, x1, x2); + res = _mm512_mask3_fmaddne_pbh (res, x1, x2, m32); + res = _mm512_maskz_fmaddne_pbh (m32,res, x1, x2); + res = _mm512_fmsubne_pbh (res, x1, x2); + res = _mm512_mask_fmsubne_pbh (res, m32, x1, x2); + res = _mm512_mask3_fmsubne_pbh (res, x1, x2, m32); + res = _mm512_maskz_fmsubne_pbh (m32,res, x1, x2); + res = _mm512_fnmaddne_pbh (res, x1, x2); + res = _mm512_mask_fnmaddne_pbh (res, m32, x1, x2); + res = _mm512_mask3_fnmaddne_pbh (res, x1, x2, m32); + res = _mm512_maskz_fnmaddne_pbh (m32,res, x1, x2); + res = _mm512_fnmsubne_pbh (res, x1, x2); + res = _mm512_mask_fnmsubne_pbh (res, m32, x1, x2); + res = _mm512_mask3_fnmsubne_pbh (res, x1, x2, m32); + res = _mm512_maskz_fnmsubne_pbh (m32,res, x1, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vaddnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vaddnepbf16-2.c new file mode 100644 index 0000000..3b7d163 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vaddnepbf16-2.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = (float) (2 * (i % 7) + 7); + float y = (float) (3 * (i % 7) - 5); + float res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + res = x + y; + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res); + } + + res1.x = INTRINSIC (_addne_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_addne_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_addne_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vdivnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vdivnepbf16-2.c new file mode 100644 index 0000000..ca90828 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vdivnepbf16-2.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = (float) (2 * (i % 7) + 7); + float y = (float) (3 * (i % 7) - 5); + float res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + res = x / y; + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res); + } + + res1.x = INTRINSIC (_divne_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_divne_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_divne_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c new file mode 100644 index 0000000..b19c9d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmaddXXXnepbf16-2.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + float x = 0.5; + float y = 2; + float z = 0.25; + src1.a[i] = convert_fp32_to_bf16 (x); + src2.a[i] = convert_fp32_to_bf16 (y); + res1.a[i] = convert_fp32_to_bf16 (z); + res2.a[i] = res1.a[i]; + float x16, y16, z16, m1, m2; + x16 = convert_bf16_to_fp32 (src1.a[i]); + y16 = convert_bf16_to_fp32 (src2.a[i]); + z16 = convert_bf16_to_fp32 (res1.a[i]); + m1 = y16 + x16 * z16; + m2 = z16 + x16 * y16; + res_ref[i] = convert_fp32_to_bf16 (m1); + res_ref2[i] = convert_fp32_to_bf16 (m2); + } + + MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES); + MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES); + res1.x = INTRINSIC (_mask_fmaddne_pbh) (res1.x, mask, src1.x, src2.x); + res2.x = INTRINSIC (_mask3_fmaddne_pbh) (src1.x, src2.x, res2.x, mask); + + MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c new file mode 100644 index 0000000..86adbc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfmsubXXXnepbf16-2.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" + +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + float x = 0.5; + float y = 2; + float z = 0.25; + src1.a[i] = convert_fp32_to_bf16 (x); + src2.a[i] = convert_fp32_to_bf16 (y); + res1.a[i] = convert_fp32_to_bf16 (z); + res2.a[i] = res1.a[i]; + float x16, y16, z16, m1, m2; + x16 = convert_bf16_to_fp32 (src1.a[i]); + y16 = convert_bf16_to_fp32 (src2.a[i]); + z16 = convert_bf16_to_fp32 (res1.a[i]); + m1 = -y16 + x16 * z16; + m2 = -z16 + x16 * y16; + res_ref[i] = convert_fp32_to_bf16 (m1); + res_ref2[i] = convert_fp32_to_bf16 (m2); + } + + MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES); + MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES); + res1.x = INTRINSIC (_mask_fmsubne_pbh) (res1.x, mask, src1.x, src2.x); + res2.x = INTRINSIC (_mask3_fmsubne_pbh) (src1.x, src2.x, res2.x, mask); + + MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c new file mode 100644 index 0000000..3a7d4cf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmaddXXXnepbf16-2.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" + +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + float x = 0.5; + float y = 2; + float z = 0.25; + src1.a[i] = convert_fp32_to_bf16 (x); + src2.a[i] = convert_fp32_to_bf16 (y); + res1.a[i] = convert_fp32_to_bf16 (z); + res2.a[i] = res1.a[i]; + float x16, y16, z16, m1, m2; + x16 = convert_bf16_to_fp32 (src1.a[i]); + y16 = convert_bf16_to_fp32 (src2.a[i]); + z16 = convert_bf16_to_fp32 (res1.a[i]); + m1 = y16 - x16 * z16; + m2 = z16 - x16 * y16; + res_ref[i] = convert_fp32_to_bf16 (m1); + res_ref2[i] = convert_fp32_to_bf16 (m2); + } + + MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES); + MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES); + res1.x = INTRINSIC (_mask_fnmaddne_pbh) (res1.x, mask, src1.x, src2.x); + res2.x = INTRINSIC (_mask3_fnmaddne_pbh) (src1.x, src2.x, res2.x, mask); + + MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c new file mode 100644 index 0000000..943146e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vfnmsubXXXnepbf16-2.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" + +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + float x = 0.5; + float y = 2; + float z = 0.25; + src1.a[i] = convert_fp32_to_bf16 (x); + src2.a[i] = convert_fp32_to_bf16 (y); + res1.a[i] = convert_fp32_to_bf16 (z); + res2.a[i] = res1.a[i]; + float x16, y16, z16, m1, m2; + x16 = convert_bf16_to_fp32 (src1.a[i]); + y16 = convert_bf16_to_fp32 (src2.a[i]); + z16 = convert_bf16_to_fp32 (res1.a[i]); + m1 = -y16 - x16 * z16; + m2 = -z16 - x16 * y16; + res_ref[i] = convert_fp32_to_bf16 (m1); + res_ref2[i] = convert_fp32_to_bf16 (m2); + } + + MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES); + MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES); + res1.x = INTRINSIC (_mask_fnmsubne_pbh) (res1.x, mask, src1.x, src2.x); + res2.x = INTRINSIC (_mask3_fnmsubne_pbh) (src1.x, src2.x, res2.x, mask); + + MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c new file mode 100644 index 0000000..a563b1e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = 0.5; + float y = 0.25; + float res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + if (x > y) + res_ref[i] = res_ref2[i] = src1.a[i]; + else + res_ref[i] = res_ref2[i] = src2.a[i]; + } + + res1.x = INTRINSIC (_max_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_max_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_max_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c new file mode 100644 index 0000000..10f13d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = 0.5; + float y = 0.25; + float res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + if (x < y) + res_ref[i] = res_ref2[i] = src1.a[i]; + else + res_ref[i] = res_ref2[i] = src2.a[i]; + } + + res1.x = INTRINSIC (_min_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_min_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_min_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vmulnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmulnepbf16-2.c new file mode 100644 index 0000000..ce16807 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmulnepbf16-2.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = (float) (2 * (i % 7) + 7); + float y = (float) (3 * (i % 7) - 5); + float res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + res = x * y; + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res); + } + + res1.x = INTRINSIC (_mulne_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_mulne_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_mulne_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c new file mode 100644 index 0000000..78df474 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = (float) (2 * (i % 7) + 7); + float y = 1.0 + (float) (4 * i) / (float) SIZE_RES; + float xx, yy, res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + xx = convert_bf16_to_fp32 (src1.a[i]); + yy = convert_bf16_to_fp32 (src2.a[i]); + res = scalef (xx, yy); + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16 (res); + } + + res1.x = INTRINSIC (_scalef_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_scalef_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_scalef_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vsubnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vsubnepbf16-2.c new file mode 100644 index 0000000..f8a9a51 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vsubnepbf16-2.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE_RES (AVX512F_LEN / 16) + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES]; + + for (i = 0; i < SIZE_RES; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = (float) (2 * (i % 7) + 7); + float y = (float) (3 * (i % 7) - 5); + float res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + res = x - y; + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res); + } + + res1.x = INTRINSIC (_subne_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_subne_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_subne_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c new file mode 100644 index 0000000..831c8f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c @@ -0,0 +1,172 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2 -O2" } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaddnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsubnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmulnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vdivnepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmadd132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub231nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub132nepbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> + +volatile __m256bh res, x1, x2; +volatile __m128bh res1, x3, x4; +volatile __mmask16 m16; +volatile __mmask8 m8; + +void extern +avx10_2_test (void) +{ + res = _mm256_addne_pbh (x1, x2); + res = _mm256_mask_addne_pbh (res, m16, x1, x2); + res = _mm256_maskz_addne_pbh (m16, x1, x2); + res1 = _mm_addne_pbh (x3, x4); + res1 = _mm_mask_addne_pbh (res1, m8, x3, x4); + res1 = _mm_maskz_addne_pbh (m8, x3, x4); + + res = _mm256_subne_pbh (x1, x2); + res = _mm256_mask_subne_pbh (res, m16, x1, x2); + res = _mm256_maskz_subne_pbh (m16, x1, x2); + res1 = _mm_subne_pbh (x3, x4); + res1 = _mm_mask_subne_pbh (res1, m8, x3, x4); + res1 = _mm_maskz_subne_pbh (m8, x3, x4); + + res = _mm256_mulne_pbh (x1, x2); + res = _mm256_mask_mulne_pbh (res, m16, x1, x2); + res = _mm256_maskz_mulne_pbh (m16, x1, x2); + res1 = _mm_mulne_pbh (x3, x4); + res1 = _mm_mask_mulne_pbh (res1, m8, x3, x4); + res1 = _mm_maskz_mulne_pbh (m8, x3, x4); + + res = _mm256_divne_pbh (x1, x2); + res = _mm256_mask_divne_pbh (res, m16, x1, x2); + res = _mm256_maskz_divne_pbh (m16, x1, x2); + res1 = _mm_divne_pbh (x3, x4); + res1 = _mm_mask_divne_pbh (res1, m8, x3, x4); + res1 = _mm_maskz_divne_pbh (m8, x3, x4); + + res = _mm256_max_pbh (x1, x2); + res = _mm256_mask_max_pbh (res, m16, x1, x2); + res = _mm256_maskz_max_pbh (m16, x1, x2); + res1 = _mm_max_pbh (x3, x4); + res1 = _mm_mask_max_pbh (res1, m8, x3, x4); + res1 = _mm_maskz_max_pbh (m8, x3, x4); + + res = _mm256_min_pbh (x1, x2); + res = _mm256_mask_min_pbh (res, m16, x1, x2); + res = _mm256_maskz_min_pbh (m16, x1, x2); + res1 = _mm_min_pbh (x3, x4); + res1 = _mm_mask_min_pbh (res1, m8, x3, x4); + res1 = _mm_maskz_min_pbh (m8, x3, x4); + + res = _mm256_scalef_pbh (x1, x2); + res = _mm256_mask_scalef_pbh (res, m16, x1, x2); + res = _mm256_maskz_scalef_pbh (m16, x1, x2); + res1 = _mm_scalef_pbh (x3, x4); + res1 = _mm_mask_scalef_pbh (res1, m8, x3, x4); + res1 = _mm_maskz_scalef_pbh (m8, x3, x4); + + res = _mm256_fmaddne_pbh (res, x1, x2); + res = _mm256_mask_fmaddne_pbh (res, m16, x1, x2); + res = _mm256_mask3_fmaddne_pbh (res, x1, x2, m16); + res = _mm256_maskz_fmaddne_pbh (m16,res, x1, x2); + res1 = _mm_fmaddne_pbh (res1, x3, x4); + res1 = _mm_mask_fmaddne_pbh (res1, m8, x3, x4); + res1 = _mm_mask3_fmaddne_pbh (res1, x3, x4, m8); + res1 = _mm_maskz_fmaddne_pbh (m8,res1, x3, x4); + + res = _mm256_fmsubne_pbh (res, x1, x2); + res = _mm256_mask_fmsubne_pbh (res, m16, x1, x2); + res = _mm256_mask3_fmsubne_pbh (res, x1, x2, m16); + res = _mm256_maskz_fmsubne_pbh (m16,res, x1, x2); + res1 = _mm_fmsubne_pbh (res1, x3, x4); + res1 = _mm_mask_fmsubne_pbh (res1, m8, x3, x4); + res1 = _mm_mask3_fmsubne_pbh (res1, x3, x4, m8); + res1 = _mm_maskz_fmsubne_pbh (m8,res1, x3, x4); + + res = _mm256_fnmaddne_pbh (res, x1, x2); + res = _mm256_mask_fnmaddne_pbh (res, m16, x1, x2); + res = _mm256_mask3_fnmaddne_pbh (res, x1, x2, m16); + res = _mm256_maskz_fnmaddne_pbh (m16,res, x1, x2); + res1 = _mm_fnmaddne_pbh (res1, x3, x4); + res1 = _mm_mask_fnmaddne_pbh (res1, m8, x3, x4); + res1 = _mm_mask3_fnmaddne_pbh (res1, x3, x4, m8); + res1 = _mm_maskz_fnmaddne_pbh (m8,res1, x3, x4); + + res = _mm256_fnmsubne_pbh (res, x1, x2); + res = _mm256_mask_fnmsubne_pbh (res, m16, x1, x2); + res = _mm256_mask3_fnmsubne_pbh (res, x1, x2, m16); + res = _mm256_maskz_fnmsubne_pbh (m16,res, x1, x2); + res1 = _mm_fnmsubne_pbh (res1, x3, x4); + res1 = _mm_mask_fnmsubne_pbh (res1, m8, x3, x4); + res1 = _mm_mask3_fnmsubne_pbh (res1, x3, x4, m8); + res1 = _mm_maskz_fnmsubne_pbh (m8,res1, x3, x4); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vaddnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vaddnepbf16-2.c new file mode 100644 index 0000000..7783dce --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vaddnepbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vaddnepbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vaddnepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vdivnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vdivnepbf16-2.c new file mode 100644 index 0000000..dd2c544 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vdivnepbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vdivnepbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vdivnepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c new file mode 100644 index 0000000..a4f2e5f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfmaddXXXnepbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vfmaddXXXnepbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vfmaddXXXnepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c new file mode 100644 index 0000000..406c173 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfmsubXXXnepbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vfmsubXXXnepbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vfmsubXXXnepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c new file mode 100644 index 0000000..3f53099 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmaddXXXnepbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vfnmaddXXXnepbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vfnmaddXXXnepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c new file mode 100644 index 0000000..fc906cc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vfnmsubXXXnepbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vfnmsubXXXnepbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vfnmsubXXXnepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c new file mode 100644 index 0000000..2b8f820 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vmaxpbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vmaxpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c new file mode 100644 index 0000000..dcb7c0e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vminpbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vminpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmulnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmulnepbf16-2.c new file mode 100644 index 0000000..753e2d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmulnepbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vmulnepbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vmulnepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c new file mode 100644 index 0000000..8f26dfb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vscalefpbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vscalefpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vsubnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vsubnepbf16-2.c new file mode 100644 index 0000000..ad02ee1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vsubnepbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vsubnepbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vsubnepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-helper.h b/gcc/testsuite/gcc.target/i386/avx512f-helper.h index 3cd6751..b61c03b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-helper.h +++ b/gcc/testsuite/gcc.target/i386/avx512f-helper.h @@ -45,6 +45,7 @@ MAKE_MASK_MERGE(, float) MAKE_MASK_MERGE(d, double) MAKE_MASK_MERGE(i_ub, unsigned char) MAKE_MASK_MERGE(i_uw, unsigned short) +MAKE_MASK_MERGE(bf16_uw, unsigned short) MAKE_MASK_MERGE(i_ud, unsigned int) MAKE_MASK_MERGE(i_uq, unsigned long long) @@ -70,6 +71,7 @@ MAKE_MASK_ZERO(, float) MAKE_MASK_ZERO(d, double) MAKE_MASK_ZERO(i_ub, unsigned char) MAKE_MASK_ZERO(i_uw, unsigned short) +MAKE_MASK_ZERO(bf16_uw, unsigned short) MAKE_MASK_ZERO(i_ud, unsigned int) MAKE_MASK_ZERO(i_uq, unsigned long long) diff --git a/gcc/testsuite/gcc.target/i386/m512-check.h b/gcc/testsuite/gcc.target/i386/m512-check.h index d5d1837..bdc682d 100644 --- a/gcc/testsuite/gcc.target/i386/m512-check.h +++ b/gcc/testsuite/gcc.target/i386/m512-check.h @@ -69,6 +69,12 @@ typedef union typedef union { + __m512bh x; + unsigned short a[32]; +} union512bf16_uw; + +typedef union +{ __m128h x; _Float16 a[8]; } union128h; @@ -79,6 +85,18 @@ typedef union _Float16 a[16]; } union256h; +typedef union +{ + __m128bh x; + unsigned short a[8]; +} union128bf16_uw; + +typedef union +{ + __m256bh x; + unsigned short a[16]; +} union256bf16_uw; + #define CHECK_ROUGH_EXP(UNION_TYPE, VALUE_TYPE, FMT) \ static int \ __attribute__((noinline, unused)) \ @@ -155,3 +173,12 @@ CHECK_FP_EXP (union256h, _Float16, ESP_FLOAT16, "%f") CHECK_ROUGH_EXP (union128h, _Float16, "%f") CHECK_ROUGH_EXP (union256h, _Float16, "%f") #endif + +#if defined(AVX512BF16) +CHECK_EXP (union512bf16_uw, unsigned short, "%d") +#endif + +#if defined(AVX512BF16) +CHECK_EXP (union128bf16_uw, unsigned short, "%d") +CHECK_EXP (union256bf16_uw, unsigned short, "%d") +#endif |