From 7f59b88279963cd05d2c2620a03d8ddc9b7a2775 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Thu, 23 Jan 2025 09:51:59 +0800 Subject: i386: Change mnemonics from V[CMP,MAX,MIN]PBF16 to V[CMP,MAX,MIN]BF16 gcc/ChangeLog: PR target/118270 * config/i386/avx10_2-512bf16intrin.h: Change intrin and builtin name according to new mnemonics. * config/i386/avx10_2bf16intrin.h: Ditto. * config/i386/i386-builtin.def (BDESC): Ditto. * config/i386/sse.md (avx10_2_pbf16_): Rename to... (avx10_2_bf16_): ...this. Change instruction name output. (avx10_2_cmppbf16_): Rename to... (avx10_2_cmpbf16_): ...this. Change instruction name output. gcc/testsuite/ChangeLog: PR target/118270 * gcc.target/i386/avx10_2-512-bf16-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-512-bf-vector-cmpp-1.c: Move to... * gcc.target/i386/avx10_2-512-bf16-vector-cmp-1.c: ...here. Adjust asm check. * gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c: Move to... * gcc.target/i386/avx10_2-512-bf16-vector-smaxmin-1.c: ...here. Adjust asm check. * gcc.target/i386/avx10_2-512-vcmppbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vcmpbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vmaxpbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vmaxbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vminpbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vminbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-bf16-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-bf-vector-cmpp-1.c: Move to... * gcc.target/i386/avx10_2-bf16-vector-cmp-1.c: ...here. Adjust asm check. * gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c: Move to... * gcc.target/i386/avx10_2-bf16-vector-smaxmin-1.c: ...here. Adjust asm check. * gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: Move to... * gcc.target/i386/avx10_2-partial-bf16-vector-smaxmin-1.c: ...here. * gcc.target/i386/avx10_2-vcmppbf16-2.c: Move to... * gcc.target/i386/avx10_2-vcmpbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vmaxpbf16-2.c: Move to... * gcc.target/i386/avx10_2-vmaxbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vminpbf16-2.c: Move to... * gcc.target/i386/avx10_2-vminbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/part-vect-vec_cmpbf.c: Adjust asm check. * gcc.target/i386/avx-1.c: Adjust builtin call. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. --- gcc/config/i386/avx10_2-512bf16intrin.h | 36 ++++++------ gcc/config/i386/avx10_2bf16intrin.h | 68 +++++++++++----------- gcc/config/i386/i386-builtin.def | 30 +++++----- gcc/config/i386/sse.md | 8 +-- gcc/testsuite/gcc.target/i386/avx-1.c | 6 +- .../gcc.target/i386/avx10_2-512-bf-vector-cmpp-1.c | 19 ------ .../i386/avx10_2-512-bf-vector-smaxmin-1.c | 20 ------- gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c | 16 ++--- .../i386/avx10_2-512-bf16-vector-cmp-1.c | 19 ++++++ .../i386/avx10_2-512-bf16-vector-smaxmin-1.c | 20 +++++++ .../gcc.target/i386/avx10_2-512-vcmpbf16-2.c | 37 ++++++++++++ .../gcc.target/i386/avx10_2-512-vcmppbf16-2.c | 37 ------------ .../gcc.target/i386/avx10_2-512-vmaxbf16-2.c | 52 +++++++++++++++++ .../gcc.target/i386/avx10_2-512-vmaxpbf16-2.c | 52 ----------------- .../gcc.target/i386/avx10_2-512-vminbf16-2.c | 52 +++++++++++++++++ .../gcc.target/i386/avx10_2-512-vminpbf16-2.c | 52 ----------------- .../gcc.target/i386/avx10_2-bf-vector-cmpp-1.c | 29 --------- .../gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c | 36 ------------ gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c | 32 +++++----- .../gcc.target/i386/avx10_2-bf16-vector-cmp-1.c | 29 +++++++++ .../i386/avx10_2-bf16-vector-smaxmin-1.c | 36 ++++++++++++ .../i386/avx10_2-partial-bf-vector-smaxmin-1.c | 36 ------------ .../i386/avx10_2-partial-bf16-vector-smaxmin-1.c | 36 ++++++++++++ gcc/testsuite/gcc.target/i386/avx10_2-vcmpbf16-2.c | 16 +++++ .../gcc.target/i386/avx10_2-vcmppbf16-2.c | 16 ----- gcc/testsuite/gcc.target/i386/avx10_2-vmaxbf16-2.c | 16 +++++ .../gcc.target/i386/avx10_2-vmaxpbf16-2.c | 16 ----- gcc/testsuite/gcc.target/i386/avx10_2-vminbf16-2.c | 16 +++++ .../gcc.target/i386/avx10_2-vminpbf16-2.c | 16 ----- .../gcc.target/i386/part-vect-vec_cmpbf.c | 2 +- gcc/testsuite/gcc.target/i386/sse-13.c | 6 +- gcc/testsuite/gcc.target/i386/sse-23.c | 6 +- 32 files changed, 434 insertions(+), 434 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-cmpp-1.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-cmp-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-smaxmin-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcmpbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcmppbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vminbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-cmpp-1.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-cmp-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-smaxmin-1.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-partial-bf16-vector-smaxmin-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcmpbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcmppbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmaxbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c (limited to 'gcc') diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h index 4570c8f..fcd2853 100644 --- a/gcc/config/i386/avx10_2-512bf16intrin.h +++ b/gcc/config/i386/avx10_2-512bf16intrin.h @@ -142,16 +142,16 @@ extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_max_pbh (__m512bh __A, __m512bh __B) { - return (__m512bh) __builtin_ia32_maxpbf16512 (__A, __B); + return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_max_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) + __m512bh __A, __m512bh __B) { return (__m512bh) - __builtin_ia32_maxpbf16512_mask (__A, __B, __W, __U); + __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U); } extern __inline__ __m512bh @@ -159,25 +159,25 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh) - __builtin_ia32_maxpbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); + __builtin_ia32_maxbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_min_pbh (__m512bh __A, __m512bh __B) { - return (__m512bh) __builtin_ia32_minpbf16512 (__A, __B); + return (__m512bh) __builtin_ia32_minbf16512 (__A, __B); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_min_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) + __m512bh __A, __m512bh __B) { return (__m512bh) - __builtin_ia32_minpbf16512_mask (__A, __B, __W, __U); + __builtin_ia32_minbf16512_mask (__A, __B, __W, __U); } extern __inline__ __m512bh @@ -185,9 +185,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh) - __builtin_ia32_minpbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); + __builtin_ia32_minbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); } extern __inline__ __m512bh @@ -644,7 +644,7 @@ _mm512_fpclass_pbh_mask (__m512bh __A, const int __imm) #endif /* __OPIMTIZE__ */ -/* Intrinsics vcmppbf16. */ +/* Intrinsics vcmpbf16. */ #ifdef __OPTIMIZE__ extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -652,7 +652,7 @@ _mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B, const int __imm) { return (__mmask32) - __builtin_ia32_cmppbf16512_mask (__A, __B, __imm, __U); + __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U); } extern __inline __mmask32 @@ -660,16 +660,16 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm) { return (__mmask32) - __builtin_ia32_cmppbf16512_mask (__A, __B, __imm, - (__mmask32) -1); + __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, + (__mmask32) -1); } #else #define _mm512_mask_cmp_pbh_mask(A, B, C, D) \ - ((__mmask32) __builtin_ia32_cmppbf16512_mask ((B), (C), (D), (A))) + ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A))) #define _mm512_cmp_pbh_mask(A, B, C) \ - ((__mmask32) __builtin_ia32_cmppbf16512_mask ((A), (B), (C), (-1))) + ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1))) #endif /* __OPIMTIZE__ */ diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h index 6c65a3c..945556d 100644 --- a/gcc/config/i386/avx10_2bf16intrin.h +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -252,10 +252,10 @@ _mm256_max_pbh (__m256bh __A, __m256bh __B) extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_max_pbh (__m256bh __W, __mmask16 __U, - __m256bh __A, __m256bh __B) + __m256bh __A, __m256bh __B) { return (__m256bh) - __builtin_ia32_maxpbf16256_mask (__A, __B, __W, __U); + __builtin_ia32_maxbf16256_mask (__A, __B, __W, __U); } extern __inline__ __m256bh @@ -263,25 +263,25 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_max_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh) - __builtin_ia32_maxpbf16256_mask (__A, __B, - (__v16bf) _mm256_setzero_si256 (), - __U); + __builtin_ia32_maxbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_pbh (__m128bh __A, __m128bh __B) { - return (__m128bh) __builtin_ia32_maxpbf16128 (__A, __B); + return (__m128bh) __builtin_ia32_maxbf16128 (__A, __B); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_max_pbh (__m128bh __W, __mmask8 __U, - __m128bh __A, __m128bh __B) + __m128bh __A, __m128bh __B) { return (__m128bh) - __builtin_ia32_maxpbf16128_mask (__A, __B, __W, __U); + __builtin_ia32_maxbf16128_mask (__A, __B, __W, __U); } extern __inline__ __m128bh @@ -289,16 +289,16 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh) - __builtin_ia32_maxpbf16128_mask (__A, __B, - (__v8bf) _mm_setzero_si128 (), - __U); + __builtin_ia32_maxbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_pbh (__m256bh __A, __m256bh __B) { - return (__m256bh) __builtin_ia32_minpbf16256 (__A, __B); + return (__m256bh) __builtin_ia32_minbf16256 (__A, __B); } extern __inline__ __m256bh @@ -307,7 +307,7 @@ _mm256_mask_min_pbh (__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh) - __builtin_ia32_minpbf16256_mask (__A, __B, __W, __U); + __builtin_ia32_minbf16256_mask (__A, __B, __W, __U); } extern __inline__ __m256bh @@ -315,16 +315,16 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_min_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh) - __builtin_ia32_minpbf16256_mask (__A, __B, - (__v16bf) _mm256_setzero_si256 (), - __U); + __builtin_ia32_minbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_pbh (__m128bh __A, __m128bh __B) { - return (__m128bh) __builtin_ia32_minpbf16128 (__A, __B); + return (__m128bh) __builtin_ia32_minbf16128 (__A, __B); } extern __inline__ __m128bh @@ -333,7 +333,7 @@ _mm_mask_min_pbh (__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh) - __builtin_ia32_minpbf16128_mask (__A, __B, __W, __U); + __builtin_ia32_minbf16128_mask (__A, __B, __W, __U); } extern __inline__ __m128bh @@ -341,9 +341,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh) - __builtin_ia32_minpbf16128_mask (__A, __B, - (__v8bf) _mm_setzero_si128 (), - __U); + __builtin_ia32_minbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); } extern __inline__ __m256bh @@ -1231,15 +1231,15 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm) #endif /* __OPIMTIZE__ */ -/* Intrinsics vcmppbf16. */ +/* Intrinsics vcmpbf16. */ #ifdef __OPTIMIZE__ extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A, - __m256bh __B, const int __imm) + __m256bh __B, const int __imm) { return (__mmask16) - __builtin_ia32_cmppbf16256_mask (__A, __B, __imm, __U); + __builtin_ia32_cmpbf16256_mask (__A, __B, __imm, __U); } extern __inline __mmask16 @@ -1247,16 +1247,16 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_pbh_mask (__m256bh __A, __m256bh __B, const int __imm) { return (__mmask16) - __builtin_ia32_cmppbf16256_mask (__A, __B, __imm, (__mmask16) -1); + __builtin_ia32_cmpbf16256_mask (__A, __B, __imm, (__mmask16) -1); } extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cmp_pbh_mask (__mmask8 __U, __m128bh __A, - __m128bh __B, const int __imm) + __m128bh __B, const int __imm) { return (__mmask8) - __builtin_ia32_cmppbf16128_mask (__A, __B, __imm, __U); + __builtin_ia32_cmpbf16128_mask (__A, __B, __imm, __U); } extern __inline __mmask8 @@ -1264,23 +1264,23 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm) { return (__mmask8) - __builtin_ia32_cmppbf16128_mask (__A, __B, __imm, (__mmask8) -1); + __builtin_ia32_cmpbf16128_mask (__A, __B, __imm, (__mmask8) -1); } #else #define _mm256_mask_cmp_pbh_mask(A, B, C, D) \ - ((__mmask16) __builtin_ia32_cmppbf16256_mask ((B), (C), (D), (A))) + ((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A))) #define _mm256_cmp_pbh_mask(A, B, C) \ - ((__mmask16) __builtin_ia32_cmppbf16256_mask ((A), (B), (C), \ - (__mmask16) (-1))) + ((__mmask16) __builtin_ia32_cmpbf16256_mask ((A), (B), (C), \ + (__mmask16) (-1))) #define _mm_mask_cmp_pbh_mask(A, B, C, D) \ - ((__mmask8) __builtin_ia32_cmppbf16128_mask ((B), (C), (D), (A))) + ((__mmask8) __builtin_ia32_cmpbf16128_mask ((B), (C), (D), (A))) #define _mm_cmp_pbh_mask(A, B, C) \ - ((__mmask8) __builtin_ia32_cmppbf16128_mask ((A), (B), (C), \ - (__mmask8) (-1))) + ((__mmask8) __builtin_ia32_cmpbf16128_mask ((A), (B), (C), \ + (__mmask8) (-1))) #endif /* __OPIMTIZE__ */ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 5a643ca..9a8a466 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3203,18 +3203,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divbf16_v16bf, "__built BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divbf16_v16bf_mask, "__builtin_ia32_divbf16256_mask", IX86_BUILTIN_DIVBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divbf16_v8bf, "__builtin_ia32_divbf16128", IX86_BUILTIN_DIVBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divbf16_v8bf_mask, "__builtin_ia32_divbf16128_mask", IX86_BUILTIN_DIVBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf, "__builtin_ia32_maxpbf16512", IX86_BUILTIN_MAXPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf_mask, "__builtin_ia32_maxpbf16512_mask", IX86_BUILTIN_MAXPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf, "__builtin_ia32_maxpbf16256", IX86_BUILTIN_MAXPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf_mask, "__builtin_ia32_maxpbf16256_mask", IX86_BUILTIN_MAXPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf, "__builtin_ia32_maxpbf16128", IX86_BUILTIN_MAXPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf_mask, "__builtin_ia32_maxpbf16128_mask", IX86_BUILTIN_MAXPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf, "__builtin_ia32_minpbf16512", IX86_BUILTIN_MINPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf_mask, "__builtin_ia32_minpbf16512_mask", IX86_BUILTIN_MINPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf, "__builtin_ia32_minpbf16256", IX86_BUILTIN_MINPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf_mask, "__builtin_ia32_minpbf16256_mask", IX86_BUILTIN_MINPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf, "__builtin_ia32_minpbf16128", IX86_BUILTIN_MINPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf_mask, "__builtin_ia32_minpbf16128_mask", IX86_BUILTIN_MINPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxbf16_v32bf, "__builtin_ia32_maxbf16512", IX86_BUILTIN_MAXBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxbf16_v32bf_mask, "__builtin_ia32_maxbf16512_mask", IX86_BUILTIN_MAXBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxbf16_v16bf, "__builtin_ia32_maxbf16256", IX86_BUILTIN_MAXBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxbf16_v16bf_mask, "__builtin_ia32_maxbf16256_mask", IX86_BUILTIN_MAXBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxbf16_v8bf, "__builtin_ia32_maxbf16128", IX86_BUILTIN_MAXBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxbf16_v8bf_mask, "__builtin_ia32_maxbf16128_mask", IX86_BUILTIN_MAXBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminbf16_v32bf, "__builtin_ia32_minbf16512", IX86_BUILTIN_MINBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminbf16_v32bf_mask, "__builtin_ia32_minbf16512_mask", IX86_BUILTIN_MINBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v16bf, "__builtin_ia32_minbf16256", IX86_BUILTIN_MINBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v16bf_mask, "__builtin_ia32_minbf16256_mask", IX86_BUILTIN_MINBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v8bf, "__builtin_ia32_minbf16128", IX86_BUILTIN_MINBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v8bf_mask, "__builtin_ia32_minbf16128_mask", IX86_BUILTIN_MINBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf, "__builtin_ia32_scalefpbf16512", IX86_BUILTIN_SCALEFPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf_mask, "__builtin_ia32_scalefpbf16512_mask", IX86_BUILTIN_SCALEFPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf, "__builtin_ia32_scalefpbf16256", IX86_BUILTIN_SCALEFPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) @@ -3281,9 +3281,9 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantpbf16_v8bf_mask, BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fpclasspbf16_v32bf_mask, "__builtin_ia32_fpclasspbf16512_mask", IX86_BUILTIN_FPCLASSPBF16512_MASK, UNKNOWN, (int) SI_FTYPE_V32BF_INT_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v16bf_mask, "__builtin_ia32_fpclasspbf16256_mask", IX86_BUILTIN_FPCLASSPBF16256_MASK, UNKNOWN, (int) HI_FTYPE_V16BF_INT_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v8bf_mask, "__builtin_ia32_fpclasspbf16128_mask", IX86_BUILTIN_FPCLASSPBF16128_MASK, UNKNOWN, (int) QI_FTYPE_V8BF_INT_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cmppbf16_v32bf_mask, "__builtin_ia32_cmppbf16512_mask", IX86_BUILTIN_CMPPBF16512_MASK, UNKNOWN, (int) USI_FTYPE_V32BF_V32BF_INT_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmppbf16_v16bf_mask, "__builtin_ia32_cmppbf16256_mask", IX86_BUILTIN_CMPPBF16256_MASK, UNKNOWN, (int) UHI_FTYPE_V16BF_V16BF_INT_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmppbf16_v8bf_mask, "__builtin_ia32_cmppbf16128_mask", IX86_BUILTIN_CMPPBF16128_MASK, UNKNOWN, (int) UQI_FTYPE_V8BF_V8BF_INT_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cmpbf16_v32bf_mask, "__builtin_ia32_cmpbf16512_mask", IX86_BUILTIN_CMPBF16512_MASK, UNKNOWN, (int) USI_FTYPE_V32BF_V32BF_INT_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmpbf16_v16bf_mask, "__builtin_ia32_cmpbf16256_mask", IX86_BUILTIN_CMPBF16256_MASK, UNKNOWN, (int) UHI_FTYPE_V16BF_V16BF_INT_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmpbf16_v8bf_mask, "__builtin_ia32_cmpbf16128_mask", IX86_BUILTIN_CMPBF16128_MASK, UNKNOWN, (int) UQI_FTYPE_V8BF_V8BF_INT_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16eq", IX86_BUILTIN_VCOMSBF16EQ, EQ, (int) INT_FTYPE_V8BF_V8BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16gt", IX86_BUILTIN_VCOMSBF16GT, GT, (int) INT_FTYPE_V8BF_V8BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16ge", IX86_BUILTIN_VCOMSBF16GE, GE, (int) INT_FTYPE_V8BF_V8BF) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c3cb932..d1156f1 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -32092,13 +32092,13 @@ (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))] "TARGET_AVX10_2_256") -(define_insn "avx10_2_pbf16_" +(define_insn "avx10_2_bf16_" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (smaxmin:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "register_operand" "v") (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2_256" - "vpbf16\t{%2, %1, %0|%0, %1, %2}" + "vbf16\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -32436,7 +32436,7 @@ "vfpclasspbf16\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex")]) -(define_insn "avx10_2_cmppbf16_" +(define_insn "avx10_2_cmpbf16_" [(set (match_operand: 0 "register_operand" "=k") (unspec: [(match_operand:VBF_AVX10_2 1 "register_operand" "v") @@ -32444,7 +32444,7 @@ (match_operand 3 "const_0_to_31_operand" "n")] UNSPEC_PCMP))] "TARGET_AVX10_2_256" - "vcmppbf16\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vcmpbf16\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_comsbf16_v8bf" diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index d013dc7..4e85c32 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -1021,7 +1021,7 @@ #define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D) #define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C) -#define __builtin_ia32_cmppbf16512_mask(A, B, C, D) __builtin_ia32_cmppbf16512_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) /* avx10_2bf16intrin.h */ #define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D) @@ -1032,8 +1032,8 @@ #define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C) #define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C) -#define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D) -#define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D) /* avx10_2-512satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-cmpp-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-cmpp-1.c deleted file mode 100644 index 190b76d..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-cmpp-1.c +++ /dev/null @@ -1,19 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -O2 -mprefer-vector-width=512" } */ -/* { dg-final { scan-assembler-times "vcmppbf16" 5 } } */ - -typedef __bf16 v32bf __attribute__ ((__vector_size__ (64))); - -#define VCMPMN(type, op, name) \ -type \ -__attribute__ ((noinline, noclone)) \ -vec_cmp_##type##type##name (type a, type b) \ -{ \ - return a op b; \ -} - -VCMPMN (v32bf, <, lt) -VCMPMN (v32bf, <=, le) -VCMPMN (v32bf, >, gt) -VCMPMN (v32bf, >=, ge) -VCMPMN (v32bf, ==, eq) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c deleted file mode 100644 index d90c969..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c +++ /dev/null @@ -1,20 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -mprefer-vector-width=512 -Ofast" } */ -/* /* { dg-final { scan-assembler-times "vmaxpbf16" 1 } } */ -/* /* { dg-final { scan-assembler-times "vminpbf16" 1 } } */ - -void -maxpbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2) -{ - int i; - for (i = 0; i < 32; i++) - dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; -} - -void -minpbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2) -{ - int i; - for (i = 0; i < 32; i++) - dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c index 488ccc9..dd4d81e 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c @@ -12,12 +12,12 @@ /* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ @@ -60,8 +60,8 @@ /* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16z\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16z\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$1\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$2\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$1\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$2\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-cmp-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-cmp-1.c new file mode 100644 index 0000000..a5e1d43 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-cmp-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -O2 -mprefer-vector-width=512" } */ +/* { dg-final { scan-assembler-times "vcmpbf16" 5 } } */ + +typedef __bf16 v32bf __attribute__ ((__vector_size__ (64))); + +#define VCMPMN(type, op, name) \ +type \ +__attribute__ ((noinline, noclone)) \ +vec_cmp_##type##type##name (type a, type b) \ +{ \ + return a op b; \ +} + +VCMPMN (v32bf, <, lt) +VCMPMN (v32bf, <=, le) +VCMPMN (v32bf, >, gt) +VCMPMN (v32bf, >=, ge) +VCMPMN (v32bf, ==, eq) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-smaxmin-1.c new file mode 100644 index 0000000..0282de5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-vector-smaxmin-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2-512 -mprefer-vector-width=512 -Ofast" } */ +/* { dg-final { scan-assembler-times "vmaxbf16" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16" 1 } } */ + +void +maxbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 32; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 32; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcmpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcmpbf16-2.c new file mode 100644 index 0000000..a6f8f54 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcmpbf16-2.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + __mmask32 res1, res2, exp = 0; + UNION_TYPE (AVX512F_LEN, bf16_uw) src1, src2; + MASK_TYPE mask = MASK_VALUE; + + for (i = 0; i < SIZE; i++) + { + float x = 0.5; + float y = 0.25; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + if (src1.a[i] == src2.a[i]) + exp |= 1 << i; + } + + res1 = INTRINSIC (_cmp_pbh_mask) (src1.x, src2.x, 0); + res2 = INTRINSIC (_mask_cmp_pbh_mask) (mask, src1.x, src2.x, 0); + + if (exp != res1 || exp != res2) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcmppbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcmppbf16-2.c deleted file mode 100644 index a6f8f54..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcmppbf16-2.c +++ /dev/null @@ -1,37 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - __mmask32 res1, res2, exp = 0; - UNION_TYPE (AVX512F_LEN, bf16_uw) src1, src2; - MASK_TYPE mask = MASK_VALUE; - - for (i = 0; i < SIZE; i++) - { - float x = 0.5; - float y = 0.25; - src2.a[i] = convert_fp32_to_bf16 (y); - src1.a[i] = convert_fp32_to_bf16 (x); - if (src1.a[i] == src2.a[i]) - exp |= 1 << i; - } - - res1 = INTRINSIC (_cmp_pbh_mask) (src1.x, src2.x, 0); - res2 = INTRINSIC (_mask_cmp_pbh_mask) (mask, src1.x, src2.x, 0); - - if (exp != res1 || exp != res2) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxbf16-2.c new file mode 100644 index 0000000..2485e80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxbf16-2.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE], res_ref2[SIZE]; + + for (i = 0; i < SIZE; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = 0.5; + float y = 0.25; + float res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + if (x > y) + res_ref[i] = res_ref2[i] = src1.a[i]; + else + res_ref[i] = res_ref2[i] = src2.a[i]; + } + + res1.x = INTRINSIC (_max_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_max_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_max_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c deleted file mode 100644 index 2485e80..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vmaxpbf16-2.c +++ /dev/null @@ -1,52 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; - MASK_TYPE mask = MASK_VALUE; - unsigned short res_ref[SIZE], res_ref2[SIZE]; - - for (i = 0; i < SIZE; i++) - { - res1.a[i] = 0; - res2.a[i] = DEFAULT_VALUE; - res3.a[i] = DEFAULT_VALUE; - float x = 0.5; - float y = 0.25; - float res; - src2.a[i] = convert_fp32_to_bf16 (y); - src1.a[i] = convert_fp32_to_bf16 (x); - if (x > y) - res_ref[i] = res_ref2[i] = src1.a[i]; - else - res_ref[i] = res_ref2[i] = src2.a[i]; - } - - res1.x = INTRINSIC (_max_pbh) (src1.x, src2.x); - res2.x = INTRINSIC (_mask_max_pbh) (res2.x, mask, src1.x, src2.x); - res3.x = INTRINSIC (_maskz_max_pbh) (mask, src1.x, src2.x); - - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) - abort (); - - MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) - abort (); - - MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vminbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vminbf16-2.c new file mode 100644 index 0000000..7591edf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vminbf16-2.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE], res_ref2[SIZE]; + + for (i = 0; i < SIZE; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = 0.5; + float y = 0.25; + float res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + if (x < y) + res_ref[i] = res_ref2[i] = src1.a[i]; + else + res_ref[i] = res_ref2[i] = src2.a[i]; + } + + res1.x = INTRINSIC (_min_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_min_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_min_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c deleted file mode 100644 index 7591edf..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vminpbf16-2.c +++ /dev/null @@ -1,52 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; - MASK_TYPE mask = MASK_VALUE; - unsigned short res_ref[SIZE], res_ref2[SIZE]; - - for (i = 0; i < SIZE; i++) - { - res1.a[i] = 0; - res2.a[i] = DEFAULT_VALUE; - res3.a[i] = DEFAULT_VALUE; - float x = 0.5; - float y = 0.25; - float res; - src2.a[i] = convert_fp32_to_bf16 (y); - src1.a[i] = convert_fp32_to_bf16 (x); - if (x < y) - res_ref[i] = res_ref2[i] = src1.a[i]; - else - res_ref[i] = res_ref2[i] = src2.a[i]; - } - - res1.x = INTRINSIC (_min_pbh) (src1.x, src2.x); - res2.x = INTRINSIC (_mask_min_pbh) (res2.x, mask, src1.x, src2.x); - res3.x = INTRINSIC (_maskz_min_pbh) (mask, src1.x, src2.x); - - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) - abort (); - - MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) - abort (); - - MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-cmpp-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-cmpp-1.c deleted file mode 100644 index a28fe95..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-cmpp-1.c +++ /dev/null @@ -1,29 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */ -/* { dg-final { scan-assembler-times "vcmppbf16" 10 } } */ - -typedef __bf16 v16bf __attribute__ ((__vector_size__ (32))); -typedef __bf16 v8bf __attribute__ ((__vector_size__ (16))); - -#define VCMPMN(type, op, name) \ -type \ -__attribute__ ((noinline, noclone)) \ -vec_cmp_##type##type##name (type a, type b) \ -{ \ - return a op b; \ -} - -VCMPMN (v16bf, <, lt) -VCMPMN (v8bf, <, lt) - -VCMPMN (v16bf, <=, le) -VCMPMN (v8bf, <=, le) - -VCMPMN (v16bf, >, gt) -VCMPMN (v8bf, >, gt) - -VCMPMN (v16bf, >=, ge) -VCMPMN (v8bf, >=, ge) - -VCMPMN (v16bf, ==, eq) -VCMPMN (v8bf, ==, eq) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c deleted file mode 100644 index a772d4e..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c +++ /dev/null @@ -1,36 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2 -Ofast" } */ -/* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */ -/* { dg-final { scan-assembler-times "vminpbf16" 2 } } */ - -void -maxpbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2) -{ - int i; - for (i = 0; i < 16; i++) - dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; -} - -void -minpbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2) -{ - int i; - for (i = 0; i < 16; i++) - dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; -} - -void -maxpbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2) -{ - int i; - for (i = 0; i < 16; i++) - dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; -} - -void -minpbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2) -{ - int i; - for (i = 0; i < 16; i++) - dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c index d4e5400..a4841e5 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c @@ -24,18 +24,18 @@ /* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vdivbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vmaxpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vminpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vmaxbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ @@ -120,10 +120,10 @@ /* { dg-final { scan-assembler-times "vfpclasspbf16y\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16x\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16x\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$1\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$2\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$1\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcmppbf16\[ \\t\]+\\\$2\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$1\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$2\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$1\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$2\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%k\[0-9\]\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-cmp-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-cmp-1.c new file mode 100644 index 0000000..79bddb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-cmp-1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */ +/* { dg-final { scan-assembler-times "vcmpbf16" 10 } } */ + +typedef __bf16 v16bf __attribute__ ((__vector_size__ (32))); +typedef __bf16 v8bf __attribute__ ((__vector_size__ (16))); + +#define VCMPMN(type, op, name) \ +type \ +__attribute__ ((noinline, noclone)) \ +vec_cmp_##type##type##name (type a, type b) \ +{ \ + return a op b; \ +} + +VCMPMN (v16bf, <, lt) +VCMPMN (v8bf, <, lt) + +VCMPMN (v16bf, <=, le) +VCMPMN (v8bf, <=, le) + +VCMPMN (v16bf, >, gt) +VCMPMN (v8bf, >, gt) + +VCMPMN (v16bf, >=, ge) +VCMPMN (v8bf, >=, ge) + +VCMPMN (v16bf, ==, eq) +VCMPMN (v8bf, ==, eq) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-smaxmin-1.c new file mode 100644 index 0000000..703ea64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-vector-smaxmin-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2 -Ofast" } */ +/* { dg-final { scan-assembler-times "vmaxbf16" 2 } } */ +/* { dg-final { scan-assembler-times "vminbf16" 2 } } */ + +void +maxbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 16; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 16; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} + +void +maxbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 16; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 16; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c deleted file mode 100644 index c517850..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c +++ /dev/null @@ -1,36 +0,0 @@ -/* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2 -Ofast" } */ -/* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */ -/* { dg-final { scan-assembler-times "vminpbf16" 2 } } */ - -void -maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) -{ - int i; - for (i = 0; i < 4; i++) - dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; -} - -void -maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) -{ - int i; - for (i = 0; i < 2; i++) - dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; -} - -void -minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) -{ - int i; - for (i = 0; i < 4; i++) - dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; -} - -void -minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) -{ - int i; - for (i = 0; i < 2; i++) - dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf16-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf16-vector-smaxmin-1.c new file mode 100644 index 0000000..59a0fc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf16-vector-smaxmin-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2 -Ofast" } */ +/* { dg-final { scan-assembler-times "vmaxbf16" 2 } } */ +/* { dg-final { scan-assembler-times "vminbf16" 2 } } */ + +void +maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 4; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 2; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 4; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2) +{ + int i; + for (i = 0; i < 2; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcmpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcmpbf16-2.c new file mode 100644 index 0000000..cb6506a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcmpbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcmpbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcmpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcmppbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcmppbf16-2.c deleted file mode 100644 index 949bdc9..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcmppbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcmppbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcmppbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmaxbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmaxbf16-2.c new file mode 100644 index 0000000..950870f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmaxbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vmaxbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vmaxbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c deleted file mode 100644 index bfc3dd8..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vmaxpbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vmaxpbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vmaxpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vminbf16-2.c new file mode 100644 index 0000000..9786127 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vminbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vminbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c deleted file mode 100644 index bd02ce1..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vminpbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vminpbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vminpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c index 0bb720b..c904dc0 100644 --- a/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c +++ b/gcc/testsuite/gcc.target/i386/part-vect-vec_cmpbf.c @@ -1,6 +1,6 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -mavx10.2" } */ -/* { dg-final { scan-assembler-times "vcmppbf16" 10 } } */ +/* { dg-final { scan-assembler-times "vcmpbf16" 10 } } */ typedef __bf16 __attribute__((__vector_size__ (4))) v2bf; typedef __bf16 __attribute__((__vector_size__ (8))) v4bf; diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 04df397..6affd47 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1029,7 +1029,7 @@ #define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D) #define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C) -#define __builtin_ia32_cmppbf16512_mask(A, B, C, D) __builtin_ia32_cmppbf16512_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) /* avx10_2bf16intrin.h */ #define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D) @@ -1040,8 +1040,8 @@ #define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C) #define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C) -#define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D) -#define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D) /* avx10_2-512satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 0f836d9..c846b08 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -1003,7 +1003,7 @@ #define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D) #define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C) -#define __builtin_ia32_cmppbf16512_mask(A, B, C, D) __builtin_ia32_cmppbf16512_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) /* avx10_2bf16intrin.h */ #define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D) @@ -1014,8 +1014,8 @@ #define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C) #define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C) -#define __builtin_ia32_cmppbf16256_mask(A, B, C, D) __builtin_ia32_cmppbf16256_mask(A, B, 1, D) -#define __builtin_ia32_cmppbf16128_mask(A, B, C, D) __builtin_ia32_cmppbf16128_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) +#define __builtin_ia32_cmpbf16128_mask(A, B, C, D) __builtin_ia32_cmpbf16128_mask(A, B, 1, D) /* avx10_2-512satcvtintrin.h */ #define __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs512_mask_round(A, B, C, 8) -- cgit v1.1