diff options
author | liuhongt <hongtao.liu@intel.com> | 2019-03-21 11:25:58 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2021-09-18 15:00:11 +0800 |
commit | ede1820d2148b9bc41de2a22485b7608c9521eb2 (patch) | |
tree | 6b24e8ae809533708127bdc82a9344b5be0298f8 /gcc | |
parent | b6c24eab08d9649b4229b4b0396ce91f97f88dfb (diff) | |
download | gcc-ede1820d2148b9bc41de2a22485b7608c9521eb2.zip gcc-ede1820d2148b9bc41de2a22485b7608c9521eb2.tar.gz gcc-ede1820d2148b9bc41de2a22485b7608c9521eb2.tar.bz2 |
AVX512FP16: Add FP16 fma instructions.
Add vfmadd[132,213,231]ph/vfnmadd[132,213,231]ph/vfmsub[132,213,231]ph/
vfnmsub[132,213,231]ph.
gcc/ChangeLog:
* config/i386/avx512fp16intrin.h (_mm512_mask_fmadd_ph):
New intrinsic.
(_mm512_mask3_fmadd_ph): Likewise.
(_mm512_maskz_fmadd_ph): Likewise.
(_mm512_fmadd_round_ph): Likewise.
(_mm512_mask_fmadd_round_ph): Likewise.
(_mm512_mask3_fmadd_round_ph): Likewise.
(_mm512_maskz_fmadd_round_ph): Likewise.
(_mm512_fnmadd_ph): Likewise.
(_mm512_mask_fnmadd_ph): Likewise.
(_mm512_mask3_fnmadd_ph): Likewise.
(_mm512_maskz_fnmadd_ph): Likewise.
(_mm512_fnmadd_round_ph): Likewise.
(_mm512_mask_fnmadd_round_ph): Likewise.
(_mm512_mask3_fnmadd_round_ph): Likewise.
(_mm512_maskz_fnmadd_round_ph): Likewise.
(_mm512_fmsub_ph): Likewise.
(_mm512_mask_fmsub_ph): Likewise.
(_mm512_mask3_fmsub_ph): Likewise.
(_mm512_maskz_fmsub_ph): Likewise.
(_mm512_fmsub_round_ph): Likewise.
(_mm512_mask_fmsub_round_ph): Likewise.
(_mm512_mask3_fmsub_round_ph): Likewise.
(_mm512_maskz_fmsub_round_ph): Likewise.
(_mm512_fnmsub_ph): Likewise.
(_mm512_mask_fnmsub_ph): Likewise.
(_mm512_mask3_fnmsub_ph): Likewise.
(_mm512_maskz_fnmsub_ph): Likewise.
(_mm512_fnmsub_round_ph): Likewise.
(_mm512_mask_fnmsub_round_ph): Likewise.
(_mm512_mask3_fnmsub_round_ph): Likewise.
(_mm512_maskz_fnmsub_round_ph): Likewise.
* config/i386/avx512fp16vlintrin.h (_mm256_fmadd_ph):
New intrinsic.
(_mm256_mask_fmadd_ph): Likewise.
(_mm256_mask3_fmadd_ph): Likewise.
(_mm256_maskz_fmadd_ph): Likewise.
(_mm_fmadd_ph): Likewise.
(_mm_mask_fmadd_ph): Likewise.
(_mm_mask3_fmadd_ph): Likewise.
(_mm_maskz_fmadd_ph): Likewise.
(_mm256_fnmadd_ph): Likewise.
(_mm256_mask_fnmadd_ph): Likewise.
(_mm256_mask3_fnmadd_ph): Likewise.
(_mm256_maskz_fnmadd_ph): Likewise.
(_mm_fnmadd_ph): Likewise.
(_mm_mask_fnmadd_ph): Likewise.
(_mm_mask3_fnmadd_ph): Likewise.
(_mm_maskz_fnmadd_ph): Likewise.
(_mm256_fmsub_ph): Likewise.
(_mm256_mask_fmsub_ph): Likewise.
(_mm256_mask3_fmsub_ph): Likewise.
(_mm256_maskz_fmsub_ph): Likewise.
(_mm_fmsub_ph): Likewise.
(_mm_mask_fmsub_ph): Likewise.
(_mm_mask3_fmsub_ph): Likewise.
(_mm_maskz_fmsub_ph): Likewise.
(_mm256_fnmsub_ph): Likewise.
(_mm256_mask_fnmsub_ph): Likewise.
(_mm256_mask3_fnmsub_ph): Likewise.
(_mm256_maskz_fnmsub_ph): Likewise.
(_mm_fnmsub_ph): Likewise.
(_mm_mask_fnmsub_ph): Likewise.
(_mm_mask3_fnmsub_ph): Likewise.
(_mm_maskz_fnmsub_ph): Likewise.
* config/i386/i386-builtin.def: Add corresponding new builtins.
* config/i386/sse.md
(<avx512>_fmadd_<mode>_maskz<round_expand_name>): Adjust to
support HF vector modes.
(<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>):
Ditto.
(*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1): Ditto.
(*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2): Ditto.
(*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3): Ditto.
(<avx512>_fmadd_<mode>_mask<round_name>): Ditto.
(<avx512>_fmadd_<mode>_mask3<round_name>): Ditto.
(<avx512>_fmsub_<mode>_maskz<round_expand_name>): Ditto.
(<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>):
Ditto.
(*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1): Ditto.
(*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2): Ditto.
(*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3): Ditto.
(<avx512>_fmsub_<mode>_mask<round_name>): Ditto.
(<avx512>_fmsub_<mode>_mask3<round_name>): Ditto.
(<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>):
Ditto.
(*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1): Ditto.
(*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2): Ditto.
(*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3): Ditto.
(<avx512>_fnmadd_<mode>_mask<round_name>): Ditto.
(<avx512>_fnmadd_<mode>_mask3<round_name>): Ditto.
(<avx512>_fnmsub_<mode>_maskz<round_expand_name>): Ditto.
(<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>):
Ditto.
(*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1): Ditto.
(*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2): Ditto.
(*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3): Ditto.
(<avx512>_fnmsub_<mode>_mask<round_name>): Ditto.
(<avx512>_fnmsub_<mode>_mask3<round_name>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx-1.c: Add test for new builtins.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/sse-14.c: Add test fot new intrinsics.
* gcc.target/i386/sse-22.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/avx512fp16intrin.h | 432 | ||||
-rw-r--r-- | gcc/config/i386/avx512fp16vlintrin.h | 364 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 36 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 192 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-1.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-13.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-14.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-22.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse-23.c | 12 |
9 files changed, 996 insertions, 96 deletions
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index 16c6b1a..9ccbc46 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -5271,6 +5271,438 @@ _mm512_maskz_fmsubadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, #endif /* __OPTIMIZE__ */ +/* Intrinsics vfmadd[132,213,231]ph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm512_fmadd_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) __builtin_ia32_vfmaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +#else +#define _mm512_fmadd_round_ph(A, B, C, R) \ + ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), -1, (R))) + +#define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \ + ((__m512h)__builtin_ia32_vfmaddph512_mask ((A), (B), (C), (U), (R))) + +#define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \ + ((__m512h)__builtin_ia32_vfmaddph512_mask3 ((A), (B), (C), (U), (R))) + +#define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \ + ((__m512h)__builtin_ia32_vfmaddph512_maskz ((A), (B), (C), (U), (R))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vfnmadd[132,213,231]ph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmaddph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmaddph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmaddph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +#else +#define _mm512_fnmadd_round_ph(A, B, C, R) \ + ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), -1, (R))) + +#define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \ + ((__m512h)__builtin_ia32_vfnmaddph512_mask ((A), (B), (C), (U), (R))) + +#define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \ + ((__m512h)__builtin_ia32_vfnmaddph512_mask3 ((A), (B), (C), (U), (R))) + +#define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \ + ((__m512h)__builtin_ia32_vfnmaddph512_maskz ((A), (B), (C), (U), (R))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vfmsub[132,213,231]ph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) __builtin_ia32_vfmsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfmsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +#else +#define _mm512_fmsub_round_ph(A, B, C, R) \ + ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), -1, (R))) + +#define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \ + ((__m512h)__builtin_ia32_vfmsubph512_mask ((A), (B), (C), (U), (R))) + +#define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \ + ((__m512h)__builtin_ia32_vfmsubph512_mask3 ((A), (B), (C), (U), (R))) + +#define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \ + ((__m512h)__builtin_ia32_vfmsubph512_maskz ((A), (B), (C), (U), (R))) + +#endif /* __OPTIMIZE__ */ + +/* Intrinsics vfnmsub[132,213,231]ph. */ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_ph (__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_ph (__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) +{ + return (__m512h) + __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_ph (__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) +{ + return (__m512h) + __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_round_ph (__m512h __A, __mmask32 __U, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmsubph512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_round_ph (__m512h __A, __m512h __B, __m512h __C, + __mmask32 __U, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmsubph512_mask3 ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + __m512h __C, const int __R) +{ + return (__m512h) __builtin_ia32_vfnmsubph512_maskz ((__v32hf) __A, + (__v32hf) __B, + (__v32hf) __C, + (__mmask32) __U, __R); +} + +#else +#define _mm512_fnmsub_round_ph(A, B, C, R) \ + ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), -1, (R))) + +#define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \ + ((__m512h)__builtin_ia32_vfnmsubph512_mask ((A), (B), (C), (U), (R))) + +#define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \ + ((__m512h)__builtin_ia32_vfnmsubph512_mask3 ((A), (B), (C), (U), (R))) + +#define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \ + ((__m512h)__builtin_ia32_vfnmsubph512_maskz ((A), (B), (C), (U), (R))) + +#endif /* __OPTIMIZE__ */ + #ifdef __DISABLE_AVX512FP16__ #undef __DISABLE_AVX512FP16__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index ef09790..1292c02 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -2451,6 +2451,370 @@ _mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B, __U); } +/* Intrinsics vfmadd[132,213,231]ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmadd_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmadd_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmadd_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfmaddph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmadd_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmaddph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmadd_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmadd_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmadd_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmaddph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmadd_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmaddph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +/* Intrinsics vfnmadd[132,213,231]ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fnmadd_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfnmaddph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fnmadd_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmaddph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmadd_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfnmaddph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmadd_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmaddph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +/* Intrinsics vfmsub[132,213,231]ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fmsub_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fmsub_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fmsub_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfmsubph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fmsub_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfmsubph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fmsub_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fmsub_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fmsub_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfmsubph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fmsub_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfmsubph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +/* Intrinsics vfnmsub[132,213,231]ph. */ +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) -1); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_fnmsub_ph (__m256h __A, __mmask16 __U, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask3_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C, + __mmask16 __U) +{ + return (__m256h) __builtin_ia32_vfnmsubph256_mask3 ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m256h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_fnmsub_ph (__mmask16 __U, __m256h __A, __m256h __B, + __m256h __C) +{ + return (__m256h) __builtin_ia32_vfnmsubph256_maskz ((__v16hf) __A, + (__v16hf) __B, + (__v16hf) __C, + (__mmask16) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) -1); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fnmsub_ph (__m128h __A, __mmask8 __U, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask3_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C, + __mmask8 __U) +{ + return (__m128h) __builtin_ia32_vfnmsubph128_mask3 ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_fnmsub_ph (__mmask8 __U, __m128h __A, __m128h __B, + __m128h __C) +{ + return (__m128h) __builtin_ia32_vfnmsubph128_maskz ((__v8hf) __A, + (__v8hf) __B, + (__v8hf) __C, + (__mmask8) + __U); +} + #ifdef __DISABLE_AVX512FP16VL__ #undef __DISABLE_AVX512FP16VL__ #pragma GCC pop_options diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index eec6148..56d23b0 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2887,6 +2887,30 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_mask, "__builtin_ia32_vfmsubaddph128_mask", IX86_BUILTIN_VFMSUBADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_mask3, "__builtin_ia32_vfmsubaddph128_mask3", IX86_BUILTIN_VFMSUBADDPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsubadd_v8hf_maskz, "__builtin_ia32_vfmsubaddph128_maskz", IX86_BUILTIN_VFMSUBADDPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmadd_v16hf_mask, "__builtin_ia32_vfmaddph256_mask", IX86_BUILTIN_VFMADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmadd_v16hf_mask3, "__builtin_ia32_vfmaddph256_mask3", IX86_BUILTIN_VFMADDPH256_MASK3, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmadd_v16hf_maskz, "__builtin_ia32_vfmaddph256_maskz", IX86_BUILTIN_VFMADDPH256_MASKZ, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmadd_v8hf_mask, "__builtin_ia32_vfmaddph128_mask", IX86_BUILTIN_VFMADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmadd_v8hf_mask3, "__builtin_ia32_vfmaddph128_mask3", IX86_BUILTIN_VFMADDPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmadd_v8hf_maskz, "__builtin_ia32_vfmaddph128_maskz", IX86_BUILTIN_VFMADDPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fnmadd_v16hf_mask, "__builtin_ia32_vfnmaddph256_mask", IX86_BUILTIN_VFNMADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fnmadd_v16hf_mask3, "__builtin_ia32_vfnmaddph256_mask3", IX86_BUILTIN_VFNMADDPH256_MASK3, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fnmadd_v16hf_maskz, "__builtin_ia32_vfnmaddph256_maskz", IX86_BUILTIN_VFNMADDPH256_MASKZ, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fnmadd_v8hf_mask, "__builtin_ia32_vfnmaddph128_mask", IX86_BUILTIN_VFNMADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fnmadd_v8hf_mask3, "__builtin_ia32_vfnmaddph128_mask3", IX86_BUILTIN_VFNMADDPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fnmadd_v8hf_maskz, "__builtin_ia32_vfnmaddph128_maskz", IX86_BUILTIN_VFNMADDPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsub_v16hf_mask, "__builtin_ia32_vfmsubph256_mask", IX86_BUILTIN_VFMSUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsub_v16hf_mask3, "__builtin_ia32_vfmsubph256_mask3", IX86_BUILTIN_VFMSUBPH256_MASK3, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmsub_v16hf_maskz, "__builtin_ia32_vfmsubph256_maskz", IX86_BUILTIN_VFMSUBPH256_MASKZ, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsub_v8hf_mask, "__builtin_ia32_vfmsubph128_mask", IX86_BUILTIN_VFMSUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsub_v8hf_mask3, "__builtin_ia32_vfmsubph128_mask3", IX86_BUILTIN_VFMSUBPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmsub_v8hf_maskz, "__builtin_ia32_vfmsubph128_maskz", IX86_BUILTIN_VFMSUBPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fnmsub_v16hf_mask, "__builtin_ia32_vfnmsubph256_mask", IX86_BUILTIN_VFNMSUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fnmsub_v16hf_mask3, "__builtin_ia32_vfnmsubph256_mask3", IX86_BUILTIN_VFNMSUBPH256_MASK3, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fnmsub_v16hf_maskz, "__builtin_ia32_vfnmsubph256_maskz", IX86_BUILTIN_VFNMSUBPH256_MASKZ, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fnmsub_v8hf_mask, "__builtin_ia32_vfnmsubph128_mask", IX86_BUILTIN_VFNMSUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fnmsub_v8hf_mask3, "__builtin_ia32_vfnmsubph128_mask3", IX86_BUILTIN_VFNMSUBPH128_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fnmsub_v8hf_maskz, "__builtin_ia32_vfnmsubph128_maskz", IX86_BUILTIN_VFNMSUBPH128_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) @@ -3158,6 +3182,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_ro BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC_END (ROUND_ARGS, MULTI_ARG) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5553355..baf4642 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4689,10 +4689,10 @@ (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))]) (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>" - [(match_operand:VF_AVX512VL 0 "register_operand") - (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") + [(match_operand:VFH_AVX512VL 0 "register_operand") + (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F && <round_mode512bit_condition>" { @@ -4732,11 +4732,11 @@ DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" - [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") - (fma:VF_SF_AVX512VL - (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v") - (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>") - (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))] + [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") + (fma:VFH_SF_AVX512VL + (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v") + (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>") + (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))] "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} @@ -4769,12 +4769,12 @@ }) (define_insn "<avx512>_fmadd_<mode>_mask<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "register_operand" "0,0") - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") - (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")) + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (match_operand:VFH_AVX512VL 1 "register_operand" "0,0") + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") + (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX512F && <round_mode512bit_condition>" @@ -4785,12 +4785,12 @@ (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v") - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") - (match_operand:VF_AVX512VL 3 "register_operand" "0")) + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v") + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") + (match_operand:VFH_AVX512VL 3 "register_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F" @@ -4817,10 +4817,10 @@ (set_attr "mode" "<MODE>")]) (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>" - [(match_operand:VF_AVX512VL 0 "register_operand") - (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") + [(match_operand:VFH_AVX512VL 0 "register_operand") + (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F && <round_mode512bit_condition>" { @@ -4831,12 +4831,12 @@ }) (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>" - [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") - (fma:VF_SF_AVX512VL - (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v") - (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>") - (neg:VF_SF_AVX512VL - (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))] + [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") + (fma:VFH_SF_AVX512VL + (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v") + (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>") + (neg:VFH_SF_AVX512VL + (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))] "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} @@ -4870,13 +4870,13 @@ }) (define_insn "<avx512>_fmsub_<mode>_mask<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "register_operand" "0,0") - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))) + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (match_operand:VFH_AVX512VL 1 "register_operand" "0,0") + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX512F" @@ -4887,13 +4887,13 @@ (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v") - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 3 "register_operand" "0"))) + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v") + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 3 "register_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F && <round_mode512bit_condition>" @@ -4920,10 +4920,10 @@ (set_attr "mode" "<MODE>")]) (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>" - [(match_operand:VF_AVX512VL 0 "register_operand") - (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") + [(match_operand:VFH_AVX512VL 0 "register_operand") + (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F && <round_mode512bit_condition>" { @@ -4934,12 +4934,12 @@ }) (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>" - [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") - (fma:VF_SF_AVX512VL - (neg:VF_SF_AVX512VL - (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")) - (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>") - (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))] + [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") + (fma:VFH_SF_AVX512VL + (neg:VFH_SF_AVX512VL + (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")) + (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>") + (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))] "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} @@ -4973,13 +4973,13 @@ }) (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "register_operand" "0,0")) - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") - (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")) + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 1 "register_operand" "0,0")) + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") + (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX512F && <round_mode512bit_condition>" @@ -4990,13 +4990,13 @@ (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")) - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") - (match_operand:VF_AVX512VL 3 "register_operand" "0")) + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v")) + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") + (match_operand:VFH_AVX512VL 3 "register_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F && <round_mode512bit_condition>" @@ -5024,10 +5024,10 @@ (set_attr "mode" "<MODE>")]) (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>" - [(match_operand:VF_AVX512VL 0 "register_operand") - (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") - (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") + [(match_operand:VFH_AVX512VL 0 "register_operand") + (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>") + (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F && <round_mode512bit_condition>" { @@ -5038,13 +5038,13 @@ }) (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>" - [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") - (fma:VF_SF_AVX512VL - (neg:VF_SF_AVX512VL - (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")) - (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>") - (neg:VF_SF_AVX512VL - (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))] + [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") + (fma:VFH_SF_AVX512VL + (neg:VFH_SF_AVX512VL + (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")) + (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>") + (neg:VFH_SF_AVX512VL + (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))] "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" "@ vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} @@ -5079,14 +5079,14 @@ }) (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "register_operand" "0,0")) - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))) + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 1 "register_operand" "0,0")) + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v") + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX512F && <round_mode512bit_condition>" @@ -5097,14 +5097,14 @@ (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") - (vec_merge:VF_AVX512VL - (fma:VF_AVX512VL - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")) - (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") - (neg:VF_AVX512VL - (match_operand:VF_AVX512VL 3 "register_operand" "0"))) + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (vec_merge:VFH_AVX512VL + (fma:VFH_AVX512VL + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v")) + (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>") + (neg:VFH_AVX512VL + (match_operand:VFH_AVX512VL 3 "register_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX512F" diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 5f474ca..8e2428a 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -763,6 +763,18 @@ #define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) #define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) #define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 876fe3c..7f4b2a3 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -780,6 +780,18 @@ #define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) #define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) #define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 48895e0..9151e50 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -838,6 +838,10 @@ test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8) test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8) test_3 (_mm512_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) test_3 (_mm512_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fmadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fnmadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fmsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fnmsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) @@ -876,6 +880,18 @@ test_4 (_mm512_maskz_fmaddsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __ test_4 (_mm512_mask3_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) test_4 (_mm512_mask_fmsubadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) test_4 (_mm512_maskz_fmsubadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask_fmadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fmadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fmadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask_fnmadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fnmadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fnmadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask_fmsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fmsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fmsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask_fnmsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fnmsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fnmsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index bc530da..892b633 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -941,6 +941,10 @@ test_3 (_mm_maskz_cvt_roundss_sh, __m128h, __mmask8, __m128h, __m128, 8) test_3 (_mm_maskz_cvt_roundsd_sh, __m128h, __mmask8, __m128h, __m128d, 8) test_3 (_mm512_fmaddsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) test_3 (_mm512_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fmadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fnmadd_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fmsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) +test_3 (_mm512_fnmsub_round_ph, __m512h, __m512h, __m512h, __m512h, 9) test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8) test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8) test_3x (_mm512_mask_reduce_round_ph, __m512h, __m512h, __mmask32, __m512h, 123, 8) @@ -978,6 +982,18 @@ test_4 (_mm512_maskz_fmaddsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __ test_4 (_mm512_mask3_fmsubadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) test_4 (_mm512_mask_fmsubadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) test_4 (_mm512_maskz_fmsubadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask_fmadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fmadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fmadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask_fnmadd_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fnmadd_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fnmadd_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask_fmsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fmsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fmsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) +test_4 (_mm512_mask_fnmsub_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 9) +test_4 (_mm512_mask3_fnmsub_round_ph, __m512h, __m512h, __m512h, __m512h, __mmask32, 9) +test_4 (_mm512_maskz_fnmsub_round_ph, __m512h, __mmask32, __m512h, __m512h, __m512h, 9) test_4x (_mm_mask_reduce_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) test_4x (_mm_mask_roundscale_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 123, 8) test_4x (_mm_mask_getmant_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 1, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 072e4ea..2eb5a64 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -781,6 +781,18 @@ #define __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask(A, B, C, D, 8) #define __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_mask3(A, B, C, D, 8) #define __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_mask(A, B, C, D, E) __builtin_ia32_vfmaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmaddph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfnmaddph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmaddph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfmsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfmsubph512_maskz(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, E) __builtin_ia32_vfnmsubph512_mask3(A, B, C, D, 8) +#define __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, E) __builtin_ia32_vfnmsubph512_maskz(A, B, C, D, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_cmpph128_mask(A, B, C, D) __builtin_ia32_cmpph128_mask(A, B, 1, D) |