From 71a27375d09ec6b4dee3938b6d1ed6762ecdcfea Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Thu, 23 Jan 2025 09:52:01 +0800 Subject: i386: Change mnemonics from V[GETMANT,REDUCENE,RNDSCALENE]PBF16 to V[GETMANT,REDUCE,RNDSCALE]BF16 gcc/ChangeLog: PR target/118270 * config/i386/avx10_2-512bf16intrin.h: Change intrin and builtin name according to new mnemonics. * config/i386/avx10_2bf16intrin.h: Ditto. * config/i386/i386-builtin.def (BDESC): Ditto. * config/i386/sse.md (UNSPEC_VRNDSCALEBF16): Rename from UNSPEC_VRNDSCALENEPBF16. (UNSPEC_VREDUCEBF16): Rename from UNSPEC_VREDUCENEPBF16. (UNSPEC_VGETMANTBF16): Rename from UNSPEC_VGETMANTPBF16. (BF16IMMOP): Adjust iterator due to UNSPEC name change. (bf16immop): Ditto. (avx10_2_pbf16_): Rename to... (avx10_2_bf16_): ...this. Change instruction name output. gcc/testsuite/ChangeLog: PR target/118270 * gcc.target/i386/avx10_2-512-bf16-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vgetmantbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vreducenepbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vreducebf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vrndscalenepbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vrndscalebf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-bf16-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-vgetmantpbf16-2.c: Move to... * gcc.target/i386/avx10_2-vgetmantbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vreducenepbf16-2.c: Move to... * gcc.target/i386/avx10_2-vreducebf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vrndscalenepbf16-2.c: Move to... * gcc.target/i386/avx10_2-vrndscalebf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx-1.c: Adjust builtin call. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/sse-14.c: Adjust intrin call. * gcc.target/i386/sse-22.c: Ditto. --- gcc/config/i386/avx10_2-512bf16intrin.h | 112 +++++----- gcc/config/i386/avx10_2bf16intrin.h | 232 ++++++++++----------- gcc/config/i386/i386-builtin.def | 18 +- gcc/config/i386/sse.md | 22 +- gcc/testsuite/gcc.target/i386/avx-1.c | 18 +- gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c | 30 +-- .../gcc.target/i386/avx10_2-512-vgetmantbf16-2.c | 51 +++++ .../gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c | 51 ----- .../gcc.target/i386/avx10_2-512-vreducebf16-2.c | 51 +++++ .../gcc.target/i386/avx10_2-512-vreducenepbf16-2.c | 51 ----- .../gcc.target/i386/avx10_2-512-vrndscalebf16-2.c | 47 +++++ .../i386/avx10_2-512-vrndscalenepbf16-2.c | 47 ----- gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c | 60 +++--- .../gcc.target/i386/avx10_2-vgetmantbf16-2.c | 16 ++ .../gcc.target/i386/avx10_2-vgetmantpbf16-2.c | 16 -- .../gcc.target/i386/avx10_2-vreducebf16-2.c | 16 ++ .../gcc.target/i386/avx10_2-vreducenepbf16-2.c | 16 -- .../gcc.target/i386/avx10_2-vrndscalebf16-2.c | 16 ++ .../gcc.target/i386/avx10_2-vrndscalenepbf16-2.c | 16 -- gcc/testsuite/gcc.target/i386/sse-13.c | 18 +- gcc/testsuite/gcc.target/i386/sse-14.c | 36 ++-- gcc/testsuite/gcc.target/i386/sse-22.c | 36 ++-- gcc/testsuite/gcc.target/i386/sse-23.c | 18 +- 23 files changed, 497 insertions(+), 497 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vreducebf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vreducenepbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalebf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalenepbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vgetmantbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vgetmantpbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vreducebf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vreducenepbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vrndscalebf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vrndscalenepbf16-2.c (limited to 'gcc') diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h index fcd2853..276a438 100644 --- a/gcc/config/i386/avx10_2-512bf16intrin.h +++ b/gcc/config/i386/avx10_2-512bf16intrin.h @@ -468,100 +468,100 @@ _mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A) __U); } -/* Intrinsics vrndscalepbf16. */ +/* Intrinsics vrndscalebf16. */ #ifdef __OPTIMIZE__ extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_roundscalene_pbh (__m512bh __A, int B) +_mm512_roundscale_pbh (__m512bh __A, int B) { return (__m512bh) - __builtin_ia32_rndscalenepbf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); + __builtin_ia32_rndscalebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_roundscalene_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B) +_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B) { return (__m512bh) - __builtin_ia32_rndscalenepbf16512_mask (__A, B, __W, __U); + __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_roundscalene_pbh (__mmask32 __U, __m512bh __A, int B) +_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B) { return (__m512bh) - __builtin_ia32_rndscalenepbf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - __U); + __builtin_ia32_rndscalebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + __U); } #else -#define _mm512_roundscalene_pbh(A, B) \ - (__builtin_ia32_rndscalenepbf16512_mask ((A), (B), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) +#define _mm512_roundscale_pbh(A, B) \ + (__builtin_ia32_rndscalebf16512_mask ((A), (B), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) -#define _mm512_mask_roundscalene_pbh(A, B, C, D) \ - (__builtin_ia32_rndscalenepbf16512_mask ((C), (D), (A), (B))) +#define _mm512_mask_roundscale_pbh(A, B, C, D) \ + (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B))) -#define _mm512_maskz_roundscalene_pbh(A, B, C) \ - (__builtin_ia32_rndscalenepbf16512_mask ((B), (C), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) +#define _mm512_maskz_roundscale_pbh(A, B, C) \ + (__builtin_ia32_rndscalebf16512_mask ((B), (C), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) #endif /* __OPTIMIZE__ */ -/* Intrinsics vreducepbf16. */ +/* Intrinsics vreducebf16. */ #ifdef __OPTIMIZE__ extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_reducene_pbh (__m512bh __A, int B) +_mm512_reduce_pbh (__m512bh __A, int B) { return (__m512bh) - __builtin_ia32_reducenepbf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); + __builtin_ia32_reducebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_reducene_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, int B) +_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, int B) { return (__m512bh) - __builtin_ia32_reducenepbf16512_mask (__A, B, __W, __U); + __builtin_ia32_reducebf16512_mask (__A, B, __W, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_reducene_pbh (__mmask32 __U, __m512bh __A, int B) +_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B) { return (__m512bh) - __builtin_ia32_reducenepbf16512_mask (__A, B, + __builtin_ia32_reducebf16512_mask (__A, B, (__v32bf) _mm512_setzero_si512 (), __U); } #else -#define _mm512_reducene_pbh(A, B) \ - (__builtin_ia32_reducenepbf16512_mask ((A), (B), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) +#define _mm512_reduce_pbh(A, B) \ + (__builtin_ia32_reducebf16512_mask ((A), (B), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) -#define _mm512_mask_reducene_pbh(A, B, C, D) \ - (__builtin_ia32_reducenepbf16512_mask ((C), (D), (A), (B))) +#define _mm512_mask_reduce_pbh(A, B, C, D) \ + (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B))) -#define _mm512_maskz_reducene_pbh(A, B, C) \ - (__builtin_ia32_reducenepbf16512_mask ((B), (C), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) +#define _mm512_maskz_reduce_pbh(A, B, C) \ + (__builtin_ia32_reducebf16512_mask ((B), (C), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) #endif /* __OPTIMIZE__ */ -/* Intrinsics vgetmantpbf16. */ +/* Intrinsics vgetmantbf16. */ #ifdef __OPTIMIZE__ extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -569,9 +569,9 @@ _mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { return (__m512bh) - __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); } extern __inline__ __m512bh @@ -581,8 +581,8 @@ _mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, _MM_MANTISSA_SIGN_ENUM __C) { return (__m512bh) - __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B, - __W, __U); + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + __W, __U); } extern __inline__ __m512bh @@ -592,23 +592,23 @@ _mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A, _MM_MANTISSA_SIGN_ENUM __C) { return (__m512bh) - __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B, - (__v32bf) _mm512_setzero_si512 (), - __U); + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + (__v32bf) _mm512_setzero_si512 (), + __U); } #else #define _mm512_getmant_pbh(A, B, C) \ - (__builtin_ia32_getmantpbf16512_mask ((A), (int)(((C)<<2) | (B)), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) + (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) #define _mm512_mask_getmant_pbh(A, B, C, D, E) \ - (__builtin_ia32_getmantpbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) + (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) #define _mm512_maskz_getmant_pbh(A, B, C, D) \ - (__builtin_ia32_getmantpbf16512_mask ((B), (int)(((C)<<2) | (D)), \ - (__v32bf) _mm512_setzero_si512 (), \ + (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \ + (__v32bf) _mm512_setzero_si512 (), \ (A))) #endif /* __OPTIMIZE__ */ diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h index 945556d..891df89 100644 --- a/gcc/config/i386/avx10_2bf16intrin.h +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -901,186 +901,186 @@ _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A) __U); } -/* Intrinsics vrndscalepbf16. */ +/* Intrinsics vrndscalebf16. */ #ifdef __OPTIMIZE__ extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_roundscalene_pbh (__m256bh __A, int B) +_mm256_roundscale_pbh (__m256bh __A, int B) { return (__m256bh) - __builtin_ia32_rndscalenepbf16256_mask (__A, B, - (__v16bf) _mm256_setzero_si256 (), - (__mmask16) -1); + __builtin_ia32_rndscalebf16256_mask (__A, B, + (__v16bf) _mm256_setzero_si256 (), + (__mmask16) -1); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_roundscalene_pbh (__m256bh __W, __mmask16 __U, - __m256bh __A, int B) +_mm256_mask_roundscale_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, int B) { return (__m256bh) - __builtin_ia32_rndscalenepbf16256_mask (__A, B, __W, __U); + __builtin_ia32_rndscalebf16256_mask (__A, B, __W, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_roundscalene_pbh (__mmask16 __U, __m256bh __A, int B) +_mm256_maskz_roundscale_pbh (__mmask16 __U, __m256bh __A, int B) { return (__m256bh) - __builtin_ia32_rndscalenepbf16256_mask (__A, B, - (__v16bf) _mm256_setzero_si256 (), - __U); + __builtin_ia32_rndscalebf16256_mask (__A, B, + (__v16bf) _mm256_setzero_si256 (), + __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_roundscalene_pbh (__m128bh __A, int B) +_mm_roundscale_pbh (__m128bh __A, int B) { return (__m128bh) - __builtin_ia32_rndscalenepbf16128_mask (__A, B, - (__v8bf) _mm_setzero_si128 (), - (__mmask8) -1); + __builtin_ia32_rndscalebf16128_mask (__A, B, + (__v8bf) _mm_setzero_si128 (), + (__mmask8) -1); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_roundscalene_pbh (__m128bh __W, __mmask8 __U, - __m128bh __A, int B) +_mm_mask_roundscale_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, int B) { return (__m128bh) - __builtin_ia32_rndscalenepbf16128_mask (__A, B, __W, __U); + __builtin_ia32_rndscalebf16128_mask (__A, B, __W, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_roundscalene_pbh (__mmask8 __U, __m128bh __A, int B) +_mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B) { return (__m128bh) - __builtin_ia32_rndscalenepbf16128_mask (__A, B, - (__v8bf) _mm_setzero_si128 (), - __U); + __builtin_ia32_rndscalebf16128_mask (__A, B, + (__v8bf) _mm_setzero_si128 (), + __U); } #else -#define _mm256_roundscalene_pbh(A, B) \ - (__builtin_ia32_rndscalenepbf16256_mask ((A), (B), \ - (__v16bf) _mm256_setzero_si256 (), \ - (__mmask16) -1)) +#define _mm256_roundscale_pbh(A, B) \ + (__builtin_ia32_rndscalebf16256_mask ((A), (B), \ + (__v16bf) _mm256_setzero_si256 (), \ + (__mmask16) -1)) -#define _mm256_mask_roundscalene_pbh(A, B, C, D) \ - (__builtin_ia32_rndscalenepbf16256_mask ((C), (D), (A), (B))) +#define _mm256_mask_roundscale_pbh(A, B, C, D) \ + (__builtin_ia32_rndscalebf16256_mask ((C), (D), (A), (B))) -#define _mm256_maskz_roundscalene_pbh(A, B, C) \ - (__builtin_ia32_rndscalenepbf16256_mask ((B), (C), \ - (__v16bf) _mm256_setzero_si256 (), \ - (A))) +#define _mm256_maskz_roundscale_pbh(A, B, C) \ + (__builtin_ia32_rndscalebf16256_mask ((B), (C), \ + (__v16bf) _mm256_setzero_si256 (), \ + (A))) -#define _mm_roundscalene_pbh(A, B) \ - (__builtin_ia32_rndscalenepbf16128_mask ((A), (B), \ - (__v8bf) _mm_setzero_si128 (), \ - (__mmask8) -1)) +#define _mm_roundscale_pbh(A, B) \ + (__builtin_ia32_rndscalebf16128_mask ((A), (B), \ + (__v8bf) _mm_setzero_si128 (), \ + (__mmask8) -1)) -#define _mm_mask_roundscalene_pbh(A, B, C, D) \ - (__builtin_ia32_rndscalenepbf16128_mask ((C), (D), (A), (B))) +#define _mm_mask_roundscale_pbh(A, B, C, D) \ + (__builtin_ia32_rndscalebf16128_mask ((C), (D), (A), (B))) -#define _mm_maskz_roundscalene_pbh(A, B, C) \ - (__builtin_ia32_rndscalenepbf16128_mask ((B), (C), \ - (__v8bf) _mm_setzero_si128 (), \ - (A))) +#define _mm_maskz_roundscale_pbh(A, B, C) \ + (__builtin_ia32_rndscalebf16128_mask ((B), (C), \ + (__v8bf) _mm_setzero_si128 (), \ + (A))) #endif /* __OPTIMIZE__ */ -/* Intrinsics vreducepbf16. */ +/* Intrinsics vreducebf16. */ #ifdef __OPTIMIZE__ extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_reducene_pbh (__m256bh __A, int B) +_mm256_reduce_pbh (__m256bh __A, int B) { return (__m256bh) - __builtin_ia32_reducenepbf16256_mask (__A, B, - (__v16bf) _mm256_setzero_si256 (), - (__mmask16) -1); + __builtin_ia32_reducebf16256_mask (__A, B, + (__v16bf) _mm256_setzero_si256 (), + (__mmask16) -1); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_reducene_pbh (__m256bh __W, __mmask16 __U, - __m256bh __A, int B) +_mm256_mask_reduce_pbh (__m256bh __W, __mmask16 __U, + __m256bh __A, int B) { return (__m256bh) - __builtin_ia32_reducenepbf16256_mask (__A, B, __W, __U); + __builtin_ia32_reducebf16256_mask (__A, B, __W, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_reducene_pbh (__mmask16 __U, __m256bh __A, int B) +_mm256_maskz_reduce_pbh (__mmask16 __U, __m256bh __A, int B) { return (__m256bh) - __builtin_ia32_reducenepbf16256_mask (__A, B, - (__v16bf) _mm256_setzero_si256 (), - __U); + __builtin_ia32_reducebf16256_mask (__A, B, + (__v16bf) _mm256_setzero_si256 (), + __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_reducene_pbh (__m128bh __A, int B) +_mm_reduce_pbh (__m128bh __A, int B) { return (__m128bh) - __builtin_ia32_reducenepbf16128_mask (__A, B, - (__v8bf) _mm_setzero_si128 (), - (__mmask8) -1); + __builtin_ia32_reducebf16128_mask (__A, B, + (__v8bf) _mm_setzero_si128 (), + (__mmask8) -1); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_reducene_pbh (__m128bh __W, __mmask8 __U, - __m128bh __A, int B) +_mm_mask_reduce_pbh (__m128bh __W, __mmask8 __U, + __m128bh __A, int B) { return (__m128bh) - __builtin_ia32_reducenepbf16128_mask (__A, B, __W, __U); + __builtin_ia32_reducebf16128_mask (__A, B, __W, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_reducene_pbh (__mmask8 __U, __m128bh __A, int B) +_mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B) { return (__m128bh) - __builtin_ia32_reducenepbf16128_mask (__A, B, - (__v8bf) _mm_setzero_si128 (), - __U); + __builtin_ia32_reducebf16128_mask (__A, B, + (__v8bf) _mm_setzero_si128 (), + __U); } #else -#define _mm256_reducene_pbh(A, B) \ - (__builtin_ia32_reducenepbf16256_mask ((A), (B), \ - (__v16bf) _mm256_setzero_si256 (), \ - (__mmask16) -1)) +#define _mm256_reduce_pbh(A, B) \ + (__builtin_ia32_reducebf16256_mask ((A), (B), \ + (__v16bf) _mm256_setzero_si256 (), \ + (__mmask16) -1)) -#define _mm256_mask_reducene_pbh(A, B, C, D) \ - (__builtin_ia32_reducenepbf16256_mask ((C), (D), (A), (B))) +#define _mm256_mask_reduce_pbh(A, B, C, D) \ + (__builtin_ia32_reducebf16256_mask ((C), (D), (A), (B))) -#define _mm256_maskz_reducene_pbh(A, B, C) \ - (__builtin_ia32_reducenepbf16256_mask ((B), (C), \ - (__v16bf) _mm256_setzero_si256 (), \ - (A))) +#define _mm256_maskz_reduce_pbh(A, B, C) \ + (__builtin_ia32_reducebf16256_mask ((B), (C), \ + (__v16bf) _mm256_setzero_si256 (), \ + (A))) -#define _mm_reducene_pbh(A, B) \ - (__builtin_ia32_reducenepbf16128_mask ((A), (B), \ - (__v8bf) _mm_setzero_si128 (), \ - (__mmask8) -1)) +#define _mm_reduce_pbh(A, B) \ + (__builtin_ia32_reducebf16128_mask ((A), (B), \ + (__v8bf) _mm_setzero_si128 (), \ + (__mmask8) -1)) -#define _mm_mask_reducene_pbh(A, B, C, D) \ - (__builtin_ia32_reducenepbf16128_mask ((C), (D), (A), (B))) +#define _mm_mask_reduce_pbh(A, B, C, D) \ + (__builtin_ia32_reducebf16128_mask ((C), (D), (A), (B))) -#define _mm_maskz_reducene_pbh(A, B, C) \ - (__builtin_ia32_reducenepbf16128_mask ((B), (C), \ - (__v8bf) _mm_setzero_si128 (), \ - (A))) +#define _mm_maskz_reduce_pbh(A, B, C) \ + (__builtin_ia32_reducebf16128_mask ((B), (C), \ + (__v8bf) _mm_setzero_si128 (), \ + (A))) #endif /* __OPTIMIZE__ */ -/* Intrinsics vgetmantpbf16. */ +/* Intrinsics vgetmantbf16. */ #ifdef __OPTIMIZE__ extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) @@ -1088,9 +1088,9 @@ _mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { return (__m256bh) - __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B, - (__v16bf) _mm256_setzero_si256 (), - (__mmask16) -1); + __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B, + (__v16bf) _mm256_setzero_si256 (), + (__mmask16) -1); } extern __inline__ __m256bh @@ -1100,8 +1100,8 @@ _mm256_mask_getmant_pbh (__m256bh __W, __mmask16 __U, __m256bh __A, _MM_MANTISSA_SIGN_ENUM __C) { return (__m256bh) - __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B, - __W, __U); + __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B, + __W, __U); } extern __inline__ __m256bh @@ -1111,9 +1111,9 @@ _mm256_maskz_getmant_pbh (__mmask16 __U, __m256bh __A, _MM_MANTISSA_SIGN_ENUM __C) { return (__m256bh) - __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B, - (__v16bf) _mm256_setzero_si256 (), - __U); + __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B, + (__v16bf) _mm256_setzero_si256 (), + __U); } extern __inline__ __m128bh @@ -1122,9 +1122,9 @@ _mm_getmant_pbh (__m128bh __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { return (__m128bh) - __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B, - (__v8bf) _mm_setzero_si128 (), - (__mmask8) -1); + __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B, + (__v8bf) _mm_setzero_si128 (), + (__mmask8) -1); } extern __inline__ __m128bh @@ -1134,8 +1134,8 @@ _mm_mask_getmant_pbh (__m128bh __W, __mmask8 __U, __m128bh __A, _MM_MANTISSA_SIGN_ENUM __C) { return (__m128bh) - __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B, - __W, __U); + __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B, + __W, __U); } extern __inline__ __m128bh @@ -1145,36 +1145,36 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A, _MM_MANTISSA_SIGN_ENUM __C) { return (__m128bh) - __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B, - (__v8bf) _mm_setzero_si128 (), - __U); + __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B, + (__v8bf) _mm_setzero_si128 (), + __U); } #else #define _mm256_getmant_pbh(A, B, C) \ - (__builtin_ia32_getmantpbf16256_mask ((A), (int)(((C)<<2) | (B)), \ - (__v16bf) _mm256_setzero_si256 (), \ - (__mmask16) (-1))) + (__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)), \ + (__v16bf) _mm256_setzero_si256 (), \ + (__mmask16) (-1))) #define _mm256_mask_getmant_pbh(A, B, C, D, E) \ - (__builtin_ia32_getmantpbf16256_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) + (__builtin_ia32_getmantbf16256_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) #define _mm256_maskz_getmant_pbh(A, B, C, D) \ - (__builtin_ia32_getmantpbf16256_mask ((B), (int)(((C)<<2) | (D)), \ - (__v16bf) _mm256_setzero_si256 (), \ - (A))) + (__builtin_ia32_getmantbf16256_mask ((B), (int)(((C)<<2) | (D)), \ + (__v16bf) _mm256_setzero_si256 (), \ + (A))) #define _mm_getmant_pbh(A, B, C) \ - (__builtin_ia32_getmantpbf16128_mask ((A), (int)(((C)<<2) | (B)), \ - (__v8bf) _mm_setzero_si128 (), \ - (__mmask8) (-1))) + (__builtin_ia32_getmantbf16128_mask ((A), (int)(((C)<<2) | (B)), \ + (__v8bf) _mm_setzero_si128 (), \ + (__mmask8) (-1))) #define _mm_mask_getmant_pbh(A, B, C, D, E) \ - (__builtin_ia32_getmantpbf16128_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) + (__builtin_ia32_getmantbf16128_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) #define _mm_maskz_getmant_pbh(A, B, C, D) \ - (__builtin_ia32_getmantpbf16128_mask ((B), (int)(((C)<<2) | (D)), \ - (__v8bf) _mm_setzero_si128 (), (A))) + (__builtin_ia32_getmantbf16128_mask ((B), (int)(((C)<<2) | (D)), \ + (__v8bf) _mm_setzero_si128 (), (A))) #endif /* __OPTIMIZE__ */ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 3d51cb6..17f1c17 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3269,15 +3269,15 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rcpbf16_v8bf_mask, "__b BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_getexppbf16_v32bf_mask, "__builtin_ia32_getexppbf16512_mask", IX86_BUILTIN_GETEXPPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getexppbf16_v16bf_mask, "__builtin_ia32_getexppbf16256_mask", IX86_BUILTIN_GETEXPPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getexppbf16_v8bf_mask, "__builtin_ia32_getexppbf16128_mask", IX86_BUILTIN_GETEXPPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rndscalenepbf16_v32bf_mask, "__builtin_ia32_rndscalenepbf16512_mask", IX86_BUILTIN_RNDSCALENEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalenepbf16_v16bf_mask, "__builtin_ia32_rndscalenepbf16256_mask", IX86_BUILTIN_RNDSCALENEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalenepbf16_v8bf_mask, "__builtin_ia32_rndscalenepbf16128_mask", IX86_BUILTIN_RNDSCALENEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_reducenepbf16_v32bf_mask, "__builtin_ia32_reducenepbf16512_mask", IX86_BUILTIN_REDUCENEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducenepbf16_v16bf_mask, "__builtin_ia32_reducenepbf16256_mask", IX86_BUILTIN_REDUCENEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducenepbf16_v8bf_mask, "__builtin_ia32_reducenepbf16128_mask", IX86_BUILTIN_REDUCENEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_getmantpbf16_v32bf_mask, "__builtin_ia32_getmantpbf16512_mask", IX86_BUILTIN_GETMANTPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantpbf16_v16bf_mask, "__builtin_ia32_getmantpbf16256_mask", IX86_BUILTIN_GETMANTPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantpbf16_v8bf_mask, "__builtin_ia32_getmantpbf16128_mask", IX86_BUILTIN_GETMANTPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rndscalebf16_v32bf_mask, "__builtin_ia32_rndscalebf16512_mask", IX86_BUILTIN_RNDSCALEBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalebf16_v16bf_mask, "__builtin_ia32_rndscalebf16256_mask", IX86_BUILTIN_RNDSCALEBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalebf16_v8bf_mask, "__builtin_ia32_rndscalebf16128_mask", IX86_BUILTIN_RNDSCALEBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_reducebf16_v32bf_mask, "__builtin_ia32_reducebf16512_mask", IX86_BUILTIN_REDUCEBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducebf16_v16bf_mask, "__builtin_ia32_reducebf16256_mask", IX86_BUILTIN_REDUCEBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducebf16_v8bf_mask, "__builtin_ia32_reducebf16128_mask", IX86_BUILTIN_REDUCEBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_getmantbf16_v32bf_mask, "__builtin_ia32_getmantbf16512_mask", IX86_BUILTIN_GETMANTBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantbf16_v16bf_mask, "__builtin_ia32_getmantbf16256_mask", IX86_BUILTIN_GETMANTBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantbf16_v8bf_mask, "__builtin_ia32_getmantbf16128_mask", IX86_BUILTIN_GETMANTBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fpclasspbf16_v32bf_mask, "__builtin_ia32_fpclasspbf16512_mask", IX86_BUILTIN_FPCLASSPBF16512_MASK, UNKNOWN, (int) SI_FTYPE_V32BF_INT_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v16bf_mask, "__builtin_ia32_fpclasspbf16256_mask", IX86_BUILTIN_FPCLASSPBF16256_MASK, UNKNOWN, (int) HI_FTYPE_V16BF_INT_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v8bf_mask, "__builtin_ia32_fpclasspbf16128_mask", IX86_BUILTIN_FPCLASSPBF16128_MASK, UNKNOWN, (int) QI_FTYPE_V8BF_INT_UQI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 44f4e18..7f84498 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -230,9 +230,9 @@ UNSPEC_VCVTNEPH2HF8S UNSPEC_VCVTHF82PH UNSPEC_VSCALEFPBF16 - UNSPEC_VRNDSCALENEPBF16 - UNSPEC_VREDUCENEPBF16 - UNSPEC_VGETMANTPBF16 + UNSPEC_VRNDSCALEBF16 + UNSPEC_VREDUCEBF16 + UNSPEC_VGETMANTBF16 UNSPEC_VFPCLASSPBF16 UNSPEC_VCOMSBF16 UNSPEC_VCVTNEBF162IBS @@ -32407,23 +32407,23 @@ [(set_attr "prefix" "evex")]) (define_int_iterator BF16IMMOP - [UNSPEC_VRNDSCALENEPBF16 - UNSPEC_VREDUCENEPBF16 - UNSPEC_VGETMANTPBF16]) + [UNSPEC_VRNDSCALEBF16 + UNSPEC_VREDUCEBF16 + UNSPEC_VGETMANTBF16]) (define_int_attr bf16immop - [(UNSPEC_VRNDSCALENEPBF16 "rndscalene") - (UNSPEC_VREDUCENEPBF16 "reducene") - (UNSPEC_VGETMANTPBF16 "getmant")]) + [(UNSPEC_VRNDSCALEBF16 "rndscale") + (UNSPEC_VREDUCEBF16 "reduce") + (UNSPEC_VGETMANTBF16 "getmant")]) -(define_insn "avx10_2_pbf16_" +(define_insn "avx10_2_bf16_" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (unspec:VBF_AVX10_2 [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm") (match_operand:SI 2 "const_0_to_255_operand")] BF16IMMOP))] "TARGET_AVX10_2_256" - "vpbf16\t{%2, %1, %0|%0, %1, %2}" + "vbf16\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_fpclasspbf16_" diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 36e95a7..ba2a2bb 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -1017,19 +1017,19 @@ #define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8) /* avx10_2-512bf16intrin.h */ -#define __builtin_ia32_rndscalenepbf16512_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16512_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D) -#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D) +#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C) #define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) /* avx10_2bf16intrin.h */ -#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D) -#define __builtin_ia32_rndscalenepbf16128_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16128_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16256_mask(A, B, C, D) __builtin_ia32_reducenepbf16256_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16128_mask(A, B, C, D) __builtin_ia32_reducenepbf16128_mask(A, 123, C, D) -#define __builtin_ia32_getmantpbf16256_mask(A, B, C, D) __builtin_ia32_getmantpbf16256_mask(A, 1, C, D) -#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D) +#define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D) +#define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D) +#define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C) #define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C) #define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c index dd4d81e..df19413 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c @@ -49,15 +49,15 @@ /* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16z\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16z\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcmpbf16\[ \\t\]+\\\$1\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%k\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */ @@ -125,12 +125,12 @@ avx10_2_512_test (void) res = _mm512_mask_getexp_pbh (res, m32, x1); res = _mm512_maskz_getexp_pbh (m32, x1); - res = _mm512_roundscalene_pbh (x1, IMM); - res = _mm512_mask_roundscalene_pbh (res, m32, x1, IMM); - res = _mm512_maskz_roundscalene_pbh (m32, x1, IMM); - res = _mm512_reducene_pbh (x1, IMM); - res = _mm512_mask_reducene_pbh (res, m32, x1, IMM); - res = _mm512_maskz_reducene_pbh (m32, x1, IMM); + res = _mm512_roundscale_pbh (x1, IMM); + res = _mm512_mask_roundscale_pbh (res, m32, x1, IMM); + res = _mm512_maskz_roundscale_pbh (m32, x1, IMM); + res = _mm512_reduce_pbh (x1, IMM); + res = _mm512_mask_reduce_pbh (res, m32, x1, IMM); + res = _mm512_maskz_reduce_pbh (m32, x1, IMM); res = _mm512_getmant_pbh (x1, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); res = _mm512_mask_getmant_pbh (res, m32, x1, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantbf16-2.c new file mode 100644 index 0000000..0c58873 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantbf16-2.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE], res_ref2[SIZE]; + + for (i = 0; i < SIZE; i++) + { + res1.a[i] = 5.0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + src1.a[i] = 0.5; + float x = convert_bf16_to_fp32 (src1.a[i]); + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16 (getmant (x)); + } + + res1.x = INTRINSIC (_getmant_pbh) (src1.x, _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_src); + res2.x = INTRINSIC (_mask_getmant_pbh) (res2.x, mask, src1.x, + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_src); + res3.x = INTRINSIC (_maskz_getmant_pbh) (mask, src1.x, + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_src); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c deleted file mode 100644 index 0c58873..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vgetmantpbf16-2.c +++ /dev/null @@ -1,51 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; - MASK_TYPE mask = MASK_VALUE; - unsigned short res_ref[SIZE], res_ref2[SIZE]; - - for (i = 0; i < SIZE; i++) - { - res1.a[i] = 5.0; - res2.a[i] = DEFAULT_VALUE; - res3.a[i] = DEFAULT_VALUE; - src1.a[i] = 0.5; - float x = convert_bf16_to_fp32 (src1.a[i]); - res_ref[i] = res_ref2[i] = convert_fp32_to_bf16 (getmant (x)); - } - - res1.x = INTRINSIC (_getmant_pbh) (src1.x, _MM_MANT_NORM_1_2, - _MM_MANT_SIGN_src); - res2.x = INTRINSIC (_mask_getmant_pbh) (res2.x, mask, src1.x, - _MM_MANT_NORM_1_2, - _MM_MANT_SIGN_src); - res3.x = INTRINSIC (_maskz_getmant_pbh) (mask, src1.x, - _MM_MANT_NORM_1_2, - _MM_MANT_SIGN_src); - - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) - abort (); - - MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) - abort (); - - MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vreducebf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vreducebf16-2.c new file mode 100644 index 0000000..1bfca41 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vreducebf16-2.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE], res_ref2[SIZE]; + + for (i = 0; i < SIZE; i++) + { + res1.a[i] = 5.0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float s = (float) (SIZE - 1) / (float) i; + src1.a[i] = convert_fp32_to_bf16 (s); + float x = convert_bf16_to_fp32 (src1.a[i]); + __m128 px = _mm_load_ss (&x); + __m128 mx = _mm_broadcastss_ps (px); + __m128 out = _mm_reduce_ps (mx, 0x10); + float res = _mm_cvtss_f32 (out); + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res); + } + + res1.x = INTRINSIC (_reduce_pbh) (src1.x, 0x10); + res2.x = INTRINSIC (_mask_reduce_pbh) (res2.x, mask, src1.x, 0x10); + res3.x = INTRINSIC (_maskz_reduce_pbh) (mask, src1.x, 0x10); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vreducenepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vreducenepbf16-2.c deleted file mode 100644 index 3c19dd2..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vreducenepbf16-2.c +++ /dev/null @@ -1,51 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; - MASK_TYPE mask = MASK_VALUE; - unsigned short res_ref[SIZE], res_ref2[SIZE]; - - for (i = 0; i < SIZE; i++) - { - res1.a[i] = 5.0; - res2.a[i] = DEFAULT_VALUE; - res3.a[i] = DEFAULT_VALUE; - float s = (float) (SIZE - 1) / (float) i; - src1.a[i] = convert_fp32_to_bf16 (s); - float x = convert_bf16_to_fp32 (src1.a[i]); - __m128 px = _mm_load_ss (&x); - __m128 mx = _mm_broadcastss_ps (px); - __m128 out = _mm_reduce_ps (mx, 0x10); - float res = _mm_cvtss_f32 (out); - res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (res); - } - - res1.x = INTRINSIC (_reducene_pbh) (src1.x, 0x10); - res2.x = INTRINSIC (_mask_reducene_pbh) (res2.x, mask, src1.x, 0x10); - res3.x = INTRINSIC (_maskz_reducene_pbh) (mask, src1.x, 0x10); - - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) - abort (); - - MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) - abort (); - - MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalebf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalebf16-2.c new file mode 100644 index 0000000..6f671d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalebf16-2.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE], res_ref2[SIZE]; + + for (i = 0; i < SIZE; i++) + { + res1.a[i] = 5.0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float s = (float) (SIZE - 1) / (float) i; + src1.a[i] = convert_fp32_to_bf16 (s); + float x = convert_bf16_to_fp32 (src1.a[i]); + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (rndscale (x)); + } + + res1.x = INTRINSIC (_roundscale_pbh) (src1.x, 0x10); + res2.x = INTRINSIC (_mask_roundscale_pbh) (res2.x, mask, src1.x, 0x10); + res3.x = INTRINSIC (_maskz_roundscale_pbh) (mask, src1.x, 0x10); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalenepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalenepbf16-2.c deleted file mode 100644 index 9be6bca..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vrndscalenepbf16-2.c +++ /dev/null @@ -1,47 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; - MASK_TYPE mask = MASK_VALUE; - unsigned short res_ref[SIZE], res_ref2[SIZE]; - - for (i = 0; i < SIZE; i++) - { - res1.a[i] = 5.0; - res2.a[i] = DEFAULT_VALUE; - res3.a[i] = DEFAULT_VALUE; - float s = (float) (SIZE - 1) / (float) i; - src1.a[i] = convert_fp32_to_bf16 (s); - float x = convert_bf16_to_fp32 (src1.a[i]); - res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (rndscale (x)); - } - - res1.x = INTRINSIC (_roundscalene_pbh) (src1.x, 0x10); - res2.x = INTRINSIC (_mask_roundscalene_pbh) (res2.x, mask, src1.x, 0x10); - res3.x = INTRINSIC (_maskz_roundscalene_pbh) (mask, src1.x, 0x10); - - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) - abort (); - - MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) - abort (); - - MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c index a4841e5..74addd9 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c @@ -98,24 +98,24 @@ /* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vgetexppbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrndscalenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vreducenepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vgetmantpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vreducebf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgetmantbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16y\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16y\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfpclasspbf16x\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ @@ -249,19 +249,19 @@ avx10_2_test (void) res1 = _mm_mask_getexp_pbh (res1, m8, x3); res1 = _mm_maskz_getexp_pbh (m8, x3); - res = _mm256_roundscalene_pbh (x1, IMM); - res = _mm256_mask_roundscalene_pbh (res, m16, x1, IMM); - res = _mm256_maskz_roundscalene_pbh (m16, x1, IMM); - res1 = _mm_roundscalene_pbh (x3, IMM); - res1 = _mm_mask_roundscalene_pbh (res1, m8, x3, IMM); - res1 = _mm_maskz_roundscalene_pbh (m8, x3, IMM); + res = _mm256_roundscale_pbh (x1, IMM); + res = _mm256_mask_roundscale_pbh (res, m16, x1, IMM); + res = _mm256_maskz_roundscale_pbh (m16, x1, IMM); + res1 = _mm_roundscale_pbh (x3, IMM); + res1 = _mm_mask_roundscale_pbh (res1, m8, x3, IMM); + res1 = _mm_maskz_roundscale_pbh (m8, x3, IMM); - res = _mm256_reducene_pbh (x1, IMM); - res = _mm256_mask_reducene_pbh (res, m16, x1, IMM); - res = _mm256_maskz_reducene_pbh (m16, x1, IMM); - res1 = _mm_reducene_pbh (x3, IMM); - res1 = _mm_mask_reducene_pbh (res1, m8, x3, IMM); - res1 = _mm_maskz_reducene_pbh (m8, x3, IMM); + res = _mm256_reduce_pbh (x1, IMM); + res = _mm256_mask_reduce_pbh (res, m16, x1, IMM); + res = _mm256_maskz_reduce_pbh (m16, x1, IMM); + res1 = _mm_reduce_pbh (x3, IMM); + res1 = _mm_mask_reduce_pbh (res1, m8, x3, IMM); + res1 = _mm_maskz_reduce_pbh (m8, x3, IMM); res = _mm256_getmant_pbh (x1, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src); res = _mm256_mask_getmant_pbh (res, m16, x1, _MM_MANT_NORM_p75_1p5, diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vgetmantbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vgetmantbf16-2.c new file mode 100644 index 0000000..9cdec14 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vgetmantbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vgetmantbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vgetmantbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vgetmantpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vgetmantpbf16-2.c deleted file mode 100644 index 8fbb6e7..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vgetmantpbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vgetmantpbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vgetmantpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vreducebf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vreducebf16-2.c new file mode 100644 index 0000000..318e430 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vreducebf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vreducebf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vreducebf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vreducenepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vreducenepbf16-2.c deleted file mode 100644 index 9522c83..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vreducenepbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vreducenepbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vreducenepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vrndscalebf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vrndscalebf16-2.c new file mode 100644 index 0000000..5720438 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vrndscalebf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vrndscalebf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vrndscalebf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vrndscalenepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vrndscalenepbf16-2.c deleted file mode 100644 index e27bf99..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vrndscalenepbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vrndscalenepbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vrndscalenepbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 830935e..73ed745 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1025,19 +1025,19 @@ #define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8) /* avx10_2-512bf16intrin.h */ -#define __builtin_ia32_rndscalenepbf16512_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16512_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D) -#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D) +#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C) #define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) /* avx10_2bf16intrin.h */ -#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D) -#define __builtin_ia32_rndscalenepbf16128_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16128_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16256_mask(A, B, C, D) __builtin_ia32_reducenepbf16256_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16128_mask(A, B, C, D) __builtin_ia32_reducenepbf16128_mask(A, 123, C, D) -#define __builtin_ia32_getmantpbf16256_mask(A, B, C, D) __builtin_ia32_getmantpbf16256_mask(A, 1, C, D) -#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D) +#define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D) +#define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D) +#define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C) #define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C) #define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index f285ae2..16f03e2 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1390,12 +1390,12 @@ test_2 (_mm256_cvtx_round2ps_ph, __m256h, __m256, __m256, 4) test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4) /* avx10_2-512bf16intrin.h */ -test_1 (_mm512_roundscalene_pbh, __m512bh, __m512bh, 123) -test_2 (_mm512_maskz_roundscalene_pbh, __m512bh, __mmask32, __m512bh, 123) -test_3 (_mm512_mask_roundscalene_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) -test_1 (_mm512_reducene_pbh, __m512bh, __m512bh, 123) -test_2 (_mm512_maskz_reducene_pbh, __m512bh, __mmask32, __m512bh, 123) -test_3 (_mm512_mask_reducene_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) +test_1 (_mm512_roundscale_pbh, __m512bh, __m512bh, 123) +test_2 (_mm512_maskz_roundscale_pbh, __m512bh, __mmask32, __m512bh, 123) +test_3 (_mm512_mask_roundscale_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) +test_1 (_mm512_reduce_pbh, __m512bh, __m512bh, 123) +test_2 (_mm512_maskz_reduce_pbh, __m512bh, __mmask32, __m512bh, 123) +test_3 (_mm512_mask_reduce_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) test_1x (_mm512_getmant_pbh, __m512bh, __m512bh, 1, 1) test_2x (_mm512_maskz_getmant_pbh, __m512bh, __mmask32,__m512bh, 1, 1) test_3x (_mm512_mask_getmant_pbh, __m512bh, __m512bh, __mmask32,__m512bh, 1, 1) @@ -1405,18 +1405,18 @@ test_2 (_mm512_cmp_pbh_mask, __mmask32, __m512bh, __m512bh, 1) test_3 (_mm512_mask_cmp_pbh_mask, __mmask32, __mmask32,__m512bh, __m512bh, 1) /* avx10_2bf16intrin.h */ -test_1 (_mm256_roundscalene_pbh, __m256bh, __m256bh, 123) -test_1 (_mm_roundscalene_pbh, __m128bh, __m128bh, 123) -test_2 (_mm256_maskz_roundscalene_pbh, __m256bh, __mmask16, __m256bh, 123) -test_2 (_mm_maskz_roundscalene_pbh, __m128bh, __mmask8, __m128bh, 123) -test_3 (_mm256_mask_roundscalene_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) -test_3 (_mm_mask_roundscalene_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) -test_1 (_mm256_reducene_pbh, __m256bh, __m256bh, 123) -test_1 (_mm_reducene_pbh, __m128bh, __m128bh, 123) -test_2 (_mm256_maskz_reducene_pbh, __m256bh, __mmask16, __m256bh, 123) -test_2 (_mm_maskz_reducene_pbh, __m128bh, __mmask8, __m128bh, 123) -test_3 (_mm256_mask_reducene_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) -test_3 (_mm_mask_reducene_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) +test_1 (_mm256_roundscale_pbh, __m256bh, __m256bh, 123) +test_1 (_mm_roundscale_pbh, __m128bh, __m128bh, 123) +test_2 (_mm256_maskz_roundscale_pbh, __m256bh, __mmask16, __m256bh, 123) +test_2 (_mm_maskz_roundscale_pbh, __m128bh, __mmask8, __m128bh, 123) +test_3 (_mm256_mask_roundscale_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) +test_3 (_mm_mask_roundscale_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) +test_1 (_mm256_reduce_pbh, __m256bh, __m256bh, 123) +test_1 (_mm_reduce_pbh, __m128bh, __m128bh, 123) +test_2 (_mm256_maskz_reduce_pbh, __m256bh, __mmask16, __m256bh, 123) +test_2 (_mm_maskz_reduce_pbh, __m128bh, __mmask8, __m128bh, 123) +test_3 (_mm256_mask_reduce_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) +test_3 (_mm_mask_reduce_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) test_1x (_mm256_getmant_pbh, __m256bh, __m256bh, 1, 1) test_1x (_mm_getmant_pbh, __m128bh, __m128bh, 1, 1) test_2x (_mm256_maskz_getmant_pbh, __m256bh, __mmask16,__m256bh, 1, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 3f0b1c0..4f22fee 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -1429,12 +1429,12 @@ test_2 (_mm256_cvtx_round2ps_ph, __m256h, __m256, __m256, 4) test_2 (_mm512_cvtx_round2ps_ph, __m512h, __m512, __m512, 4) /* avx10_2-512bf16intrin.h */ -test_1 (_mm512_roundscalene_pbh, __m512bh, __m512bh, 123) -test_2 (_mm512_maskz_roundscalene_pbh, __m512bh, __mmask32, __m512bh, 123) -test_3 (_mm512_mask_roundscalene_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) -test_1 (_mm512_reducene_pbh, __m512bh, __m512bh, 123) -test_2 (_mm512_maskz_reducene_pbh, __m512bh, __mmask32, __m512bh, 123) -test_3 (_mm512_mask_reducene_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) +test_1 (_mm512_roundscale_pbh, __m512bh, __m512bh, 123) +test_2 (_mm512_maskz_roundscale_pbh, __m512bh, __mmask32, __m512bh, 123) +test_3 (_mm512_mask_roundscale_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) +test_1 (_mm512_reduce_pbh, __m512bh, __m512bh, 123) +test_2 (_mm512_maskz_reduce_pbh, __m512bh, __mmask32, __m512bh, 123) +test_3 (_mm512_mask_reduce_pbh, __m512bh, __m512bh, __mmask32, __m512bh, 123) test_1x (_mm512_getmant_pbh, __m512bh, __m512bh, 1, 1) test_2x (_mm512_maskz_getmant_pbh, __m512bh, __mmask32,__m512bh, 1, 1) test_3x (_mm512_mask_getmant_pbh, __m512bh, __m512bh, __mmask32,__m512bh, 1, 1) @@ -1444,18 +1444,18 @@ test_2 (_mm512_cmp_pbh_mask, __mmask32, __m512bh, __m512bh, 1) test_3 (_mm512_mask_cmp_pbh_mask, __mmask32, __mmask32,__m512bh, __m512bh, 1) /* avx10_2bf16intrin.h */ -test_1 (_mm256_roundscalene_pbh, __m256bh, __m256bh, 123) -test_1 (_mm_roundscalene_pbh, __m128bh, __m128bh, 123) -test_2 (_mm256_maskz_roundscalene_pbh, __m256bh, __mmask16, __m256bh, 123) -test_2 (_mm_maskz_roundscalene_pbh, __m128bh, __mmask8, __m128bh, 123) -test_3 (_mm256_mask_roundscalene_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) -test_3 (_mm_mask_roundscalene_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) -test_1 (_mm256_reducene_pbh, __m256bh, __m256bh, 123) -test_1 (_mm_reducene_pbh, __m128bh, __m128bh, 123) -test_2 (_mm256_maskz_reducene_pbh, __m256bh, __mmask16, __m256bh, 123) -test_2 (_mm_maskz_reducene_pbh, __m128bh, __mmask8, __m128bh, 123) -test_3 (_mm256_mask_reducene_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) -test_3 (_mm_mask_reducene_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) +test_1 (_mm256_roundscale_pbh, __m256bh, __m256bh, 123) +test_1 (_mm_roundscale_pbh, __m128bh, __m128bh, 123) +test_2 (_mm256_maskz_roundscale_pbh, __m256bh, __mmask16, __m256bh, 123) +test_2 (_mm_maskz_roundscale_pbh, __m128bh, __mmask8, __m128bh, 123) +test_3 (_mm256_mask_roundscale_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) +test_3 (_mm_mask_roundscale_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) +test_1 (_mm256_reduce_pbh, __m256bh, __m256bh, 123) +test_1 (_mm_reduce_pbh, __m128bh, __m128bh, 123) +test_2 (_mm256_maskz_reduce_pbh, __m256bh, __mmask16, __m256bh, 123) +test_2 (_mm_maskz_reduce_pbh, __m128bh, __mmask8, __m128bh, 123) +test_3 (_mm256_mask_reduce_pbh, __m256bh, __m256bh, __mmask16, __m256bh, 123) +test_3 (_mm_mask_reduce_pbh, __m128bh, __m128bh, __mmask8, __m128bh, 123) test_1x (_mm256_getmant_pbh, __m256bh, __m256bh, 1, 1) test_1x (_mm_getmant_pbh, __m128bh, __m128bh, 1, 1) test_2x (_mm256_maskz_getmant_pbh, __m256bh, __mmask16,__m256bh, 1, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 7ebc4d0..428e4f5 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -999,19 +999,19 @@ #define __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, E) __builtin_ia32_vcvt2ps2phx512_mask_round(A, B, C, D, 8) /* avx10_2-512bf16intrin.h */ -#define __builtin_ia32_rndscalenepbf16512_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16512_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16512_mask(A, B, C, D) __builtin_ia32_reducenepbf16512_mask(A, 123, C, D) -#define __builtin_ia32_getmantpbf16512_mask(A, B, C, D) __builtin_ia32_getmantpbf16512_mask(A, 1, C, D) +#define __builtin_ia32_rndscalebf16512_mask(A, B, C, D) __builtin_ia32_rndscalebf16512_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16512_mask(A, B, C, D) __builtin_ia32_reducebf16512_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16512_mask(A, B, C, D) __builtin_ia32_getmantbf16512_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16512_mask(A, B, C) __builtin_ia32_fpclasspbf16512_mask(A, 1, C) #define __builtin_ia32_cmpbf16512_mask(A, B, C, D) __builtin_ia32_cmpbf16512_mask(A, B, 1, D) /* avx10_2bf16intrin.h */ -#define __builtin_ia32_rndscalenepbf16256_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16256_mask(A, 123, C, D) -#define __builtin_ia32_rndscalenepbf16128_mask(A, B, C, D) __builtin_ia32_rndscalenepbf16128_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16256_mask(A, B, C, D) __builtin_ia32_reducenepbf16256_mask(A, 123, C, D) -#define __builtin_ia32_reducenepbf16128_mask(A, B, C, D) __builtin_ia32_reducenepbf16128_mask(A, 123, C, D) -#define __builtin_ia32_getmantpbf16256_mask(A, B, C, D) __builtin_ia32_getmantpbf16256_mask(A, 1, C, D) -#define __builtin_ia32_getmantpbf16128_mask(A, B, C, D) __builtin_ia32_getmantpbf16128_mask(A, 1, C, D) +#define __builtin_ia32_rndscalebf16256_mask(A, B, C, D) __builtin_ia32_rndscalebf16256_mask(A, 123, C, D) +#define __builtin_ia32_rndscalebf16128_mask(A, B, C, D) __builtin_ia32_rndscalebf16128_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16256_mask(A, B, C, D) __builtin_ia32_reducebf16256_mask(A, 123, C, D) +#define __builtin_ia32_reducebf16128_mask(A, B, C, D) __builtin_ia32_reducebf16128_mask(A, 123, C, D) +#define __builtin_ia32_getmantbf16256_mask(A, B, C, D) __builtin_ia32_getmantbf16256_mask(A, 1, C, D) +#define __builtin_ia32_getmantbf16128_mask(A, B, C, D) __builtin_ia32_getmantbf16128_mask(A, 1, C, D) #define __builtin_ia32_fpclasspbf16256_mask(A, B, C) __builtin_ia32_fpclasspbf16256_mask(A, 1, C) #define __builtin_ia32_fpclasspbf16128_mask(A, B, C) __builtin_ia32_fpclasspbf16128_mask(A, 1, C) #define __builtin_ia32_cmpbf16256_mask(A, B, C, D) __builtin_ia32_cmpbf16256_mask(A, B, 1, D) -- cgit v1.1