From d4d5935f124ab72bb32d76ba8467aa2cdbc2a329 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Thu, 23 Jan 2025 09:52:03 +0800 Subject: i386: Change mnemonics from V[RSQRT,SCALEF,SQRTNE]PBF16 to V[RSQRT,SCALEF,SQRT]BF16 gcc/ChangeLog: PR target/118270 * config/i386/avx10_2-512bf16intrin.h: Change intrin and builtin name according to new mnemonics. * config/i386/avx10_2bf16intrin.h: Ditto. * config/i386/i386-builtin.def (BDESC): Ditto. * config/i386/sse.md (UNSPEC_VSCALEFBF16): Rename from UNSPEC_VSCALEFPBF16. (avx10_2_scalefpbf16_): Rename to... (avx10_2_scalefbf16_): ...this. Change instruction name output. (avx10_2_rsqrtpbf16_): Rename to... (avx10_2_rsqrtbf16_): ...this. Change instruction name output. (avx10_2_sqrtnepbf16_): Rename to... (avx10_2_sqrtbf16_): ...this. Change instruction name output. gcc/testsuite/ChangeLog: PR target/118270 * gcc.target/i386/avx10_2-512-bf16-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-512-vrsqrtpbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vrsqrtbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vscalefpbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vscalefbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vsqrtnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-512-vsqrtbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-bf16-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-vrsqrtpbf16-2.c: Move to... * gcc.target/i386/avx10_2-vrsqrtbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vscalefpbf16-2.c: Move to... * gcc.target/i386/avx10_2-vscalefbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vsqrtnepbf16-2.c: Move to... * gcc.target/i386/avx10_2-vsqrtbf16-2.c: ...here. Adjust intrin call. --- gcc/config/i386/avx10_2-512bf16intrin.h | 46 +++++------ gcc/config/i386/avx10_2bf16intrin.h | 88 +++++++++++----------- gcc/config/i386/i386-builtin.def | 24 +++--- gcc/config/i386/sse.md | 16 ++-- gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c | 24 +++--- .../gcc.target/i386/avx10_2-512-vrsqrtbf16-2.c | 48 ++++++++++++ .../gcc.target/i386/avx10_2-512-vrsqrtpbf16-2.c | 48 ------------ .../gcc.target/i386/avx10_2-512-vscalefbf16-2.c | 52 +++++++++++++ .../gcc.target/i386/avx10_2-512-vscalefpbf16-2.c | 52 ------------- .../gcc.target/i386/avx10_2-512-vsqrtbf16-2.c | 48 ++++++++++++ .../gcc.target/i386/avx10_2-512-vsqrtnepbf16-2.c | 48 ------------ gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c | 48 ++++++------ .../gcc.target/i386/avx10_2-vrsqrtbf16-2.c | 16 ++++ .../gcc.target/i386/avx10_2-vrsqrtpbf16-2.c | 16 ---- .../gcc.target/i386/avx10_2-vscalefbf16-2.c | 16 ++++ .../gcc.target/i386/avx10_2-vscalefpbf16-2.c | 16 ---- .../gcc.target/i386/avx10_2-vsqrtbf16-2.c | 16 ++++ .../gcc.target/i386/avx10_2-vsqrtnepbf16-2.c | 16 ---- 18 files changed, 319 insertions(+), 319 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vrsqrtbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vrsqrtpbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vsqrtbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vsqrtnepbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vrsqrtbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vrsqrtpbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vscalefbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vsqrtbf16-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vsqrtnepbf16-2.c (limited to 'gcc') diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h index 276a438..f60ac2c 100644 --- a/gcc/config/i386/avx10_2-512bf16intrin.h +++ b/gcc/config/i386/avx10_2-512bf16intrin.h @@ -194,16 +194,16 @@ extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_scalef_pbh (__m512bh __A, __m512bh __B) { - return (__m512bh) __builtin_ia32_scalefpbf16512 (__A, __B); + return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) + __m512bh __A, __m512bh __B) { return (__m512bh) - __builtin_ia32_scalefpbf16512_mask (__A, __B, __W, __U); + __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U); } extern __inline__ __m512bh @@ -211,9 +211,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh) - __builtin_ia32_scalefpbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); + __builtin_ia32_scalefbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); } extern __inline__ __m512bh @@ -361,9 +361,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_rsqrt_pbh (__m512bh __A) { return (__m512bh) - __builtin_ia32_rsqrtpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); + __builtin_ia32_rsqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); } @@ -372,7 +372,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) { return (__m512bh) - __builtin_ia32_rsqrtpbf16512_mask (__A, __W, __U); + __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U); } extern __inline__ __m512bh @@ -380,37 +380,37 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A) { return (__m512bh) - __builtin_ia32_rsqrtpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); + __builtin_ia32_rsqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_sqrtne_pbh (__m512bh __A) +_mm512_sqrt_pbh (__m512bh __A) { return (__m512bh) - __builtin_ia32_sqrtnepbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); + __builtin_ia32_sqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_sqrtne_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) { return (__m512bh) - __builtin_ia32_sqrtnepbf16512_mask (__A, __W, __U); + __builtin_ia32_sqrtbf16512_mask (__A, __W, __U); } extern __inline__ __m512bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_sqrtne_pbh (__mmask32 __U, __m512bh __A) +_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A) { return (__m512bh) - __builtin_ia32_sqrtnepbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); + __builtin_ia32_sqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); } extern __inline__ __m512bh diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h index 891df89..640e707 100644 --- a/gcc/config/i386/avx10_2bf16intrin.h +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -350,7 +350,7 @@ extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_scalef_pbh (__m256bh __A, __m256bh __B) { - return (__m256bh) __builtin_ia32_scalefpbf16256 (__A, __B); + return (__m256bh) __builtin_ia32_scalefbf16256 (__A, __B); } extern __inline__ __m256bh @@ -359,7 +359,7 @@ _mm256_mask_scalef_pbh (__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh) - __builtin_ia32_scalefpbf16256_mask (__A, __B, __W, __U); + __builtin_ia32_scalefbf16256_mask (__A, __B, __W, __U); } extern __inline__ __m256bh @@ -367,16 +367,16 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_scalef_pbh (__mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh) - __builtin_ia32_scalefpbf16256_mask (__A, __B, - (__v16bf) _mm256_setzero_si256 (), - __U); + __builtin_ia32_scalefbf16256_mask (__A, __B, + (__v16bf) _mm256_setzero_si256 (), + __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_scalef_pbh (__m128bh __A, __m128bh __B) { - return (__m128bh) __builtin_ia32_scalefpbf16128 (__A, __B); + return (__m128bh) __builtin_ia32_scalefbf16128 (__A, __B); } extern __inline__ __m128bh @@ -385,7 +385,7 @@ _mm_mask_scalef_pbh (__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh) - __builtin_ia32_scalefpbf16128_mask (__A, __B, __W, __U); + __builtin_ia32_scalefbf16128_mask (__A, __B, __W, __U); } extern __inline__ __m128bh @@ -393,9 +393,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh) - __builtin_ia32_scalefpbf16128_mask (__A, __B, - (__v8bf) _mm_setzero_si128 (), - __U); + __builtin_ia32_scalefbf16128_mask (__A, __B, + (__v8bf) _mm_setzero_si128 (), + __U); } extern __inline__ __m256bh @@ -682,9 +682,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rsqrt_pbh (__m256bh __A) { return (__m256bh) - __builtin_ia32_rsqrtpbf16256_mask (__A, - (__v16bf) _mm256_setzero_si256 (), - (__mmask16) -1); + __builtin_ia32_rsqrtbf16256_mask (__A, + (__v16bf) _mm256_setzero_si256 (), + (__mmask16) -1); } extern __inline__ __m256bh @@ -692,7 +692,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_rsqrt_pbh (__m256bh __W, __mmask16 __U, __m256bh __A) { return (__m256bh) - __builtin_ia32_rsqrtpbf16256_mask (__A, __W, __U); + __builtin_ia32_rsqrtbf16256_mask (__A, __W, __U); } extern __inline__ __m256bh @@ -700,9 +700,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_rsqrt_pbh (__mmask16 __U, __m256bh __A) { return (__m256bh) - __builtin_ia32_rsqrtpbf16256_mask (__A, - (__v16bf) _mm256_setzero_si256 (), - __U); + __builtin_ia32_rsqrtbf16256_mask (__A, + (__v16bf) _mm256_setzero_si256 (), + __U); } extern __inline__ __m128bh @@ -710,9 +710,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt_pbh (__m128bh __A) { return (__m128bh) - __builtin_ia32_rsqrtpbf16128_mask (__A, - (__v8bf) _mm_setzero_si128 (), - (__mmask8) -1); + __builtin_ia32_rsqrtbf16128_mask (__A, + (__v8bf) _mm_setzero_si128 (), + (__mmask8) -1); } extern __inline__ __m128bh @@ -720,7 +720,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_rsqrt_pbh (__m128bh __W, __mmask8 __U, __m128bh __A) { return (__m128bh) - __builtin_ia32_rsqrtpbf16128_mask (__A, __W, __U); + __builtin_ia32_rsqrtbf16128_mask (__A, __W, __U); } extern __inline__ __m128bh @@ -728,65 +728,65 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_rsqrt_pbh (__mmask8 __U, __m128bh __A) { return (__m128bh) - __builtin_ia32_rsqrtpbf16128_mask (__A, - (__v8bf) _mm_setzero_si128 (), - __U); + __builtin_ia32_rsqrtbf16128_mask (__A, + (__v8bf) _mm_setzero_si128 (), + __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_sqrtne_pbh (__m256bh __A) +_mm256_sqrt_pbh (__m256bh __A) { return (__m256bh) - __builtin_ia32_sqrtnepbf16256_mask (__A, - (__v16bf) _mm256_setzero_si256 (), - (__mmask16) -1); + __builtin_ia32_sqrtbf16256_mask (__A, + (__v16bf) _mm256_setzero_si256 (), + (__mmask16) -1); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_sqrtne_pbh (__m256bh __W, __mmask16 __U, __m256bh __A) +_mm256_mask_sqrt_pbh (__m256bh __W, __mmask16 __U, __m256bh __A) { return (__m256bh) - __builtin_ia32_sqrtnepbf16256_mask (__A, __W, __U); + __builtin_ia32_sqrtbf16256_mask (__A, __W, __U); } extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_sqrtne_pbh (__mmask16 __U, __m256bh __A) +_mm256_maskz_sqrt_pbh (__mmask16 __U, __m256bh __A) { return (__m256bh) - __builtin_ia32_sqrtnepbf16256_mask (__A, - (__v16bf) _mm256_setzero_si256 (), - __U); + __builtin_ia32_sqrtbf16256_mask (__A, + (__v16bf) _mm256_setzero_si256 (), + __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_sqrtne_pbh (__m128bh __A) +_mm_sqrt_pbh (__m128bh __A) { return (__m128bh) - __builtin_ia32_sqrtnepbf16128_mask (__A, - (__v8bf) _mm_setzero_si128 (), - (__mmask8) -1); + __builtin_ia32_sqrtbf16128_mask (__A, + (__v8bf) _mm_setzero_si128 (), + (__mmask8) -1); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_sqrtne_pbh (__m128bh __W, __mmask8 __U, __m128bh __A) +_mm_mask_sqrt_pbh (__m128bh __W, __mmask8 __U, __m128bh __A) { return (__m128bh) - __builtin_ia32_sqrtnepbf16128_mask (__A, __W, __U); + __builtin_ia32_sqrtbf16128_mask (__A, __W, __U); } extern __inline__ __m128bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_sqrtne_pbh (__mmask8 __U, __m128bh __A) +_mm_maskz_sqrt_pbh (__mmask8 __U, __m128bh __A) { return (__m128bh) - __builtin_ia32_sqrtnepbf16128_mask (__A, - (__v8bf) _mm_setzero_si128 (), - __U); + __builtin_ia32_sqrtbf16128_mask (__A, + (__v8bf) _mm_setzero_si128 (), + __U); } extern __inline__ __m256bh diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 17f1c17..a1a5a54 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3215,12 +3215,12 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v16bf, "__buil BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v16bf_mask, "__builtin_ia32_minbf16256_mask", IX86_BUILTIN_MINBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v8bf, "__builtin_ia32_minbf16128", IX86_BUILTIN_MINBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminbf16_v8bf_mask, "__builtin_ia32_minbf16128_mask", IX86_BUILTIN_MINBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf, "__builtin_ia32_scalefpbf16512", IX86_BUILTIN_SCALEFPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf_mask, "__builtin_ia32_scalefpbf16512_mask", IX86_BUILTIN_SCALEFPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf, "__builtin_ia32_scalefpbf16256", IX86_BUILTIN_SCALEFPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf_mask, "__builtin_ia32_scalefpbf16256_mask", IX86_BUILTIN_SCALEFPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf, "__builtin_ia32_scalefpbf16128", IX86_BUILTIN_SCALEFPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf_mask, "__builtin_ia32_scalefpbf16128_mask", IX86_BUILTIN_SCALEFPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefbf16_v32bf, "__builtin_ia32_scalefbf16512", IX86_BUILTIN_SCALEFBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefbf16_v32bf_mask, "__builtin_ia32_scalefbf16512_mask", IX86_BUILTIN_SCALEFBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefbf16_v16bf, "__builtin_ia32_scalefbf16256", IX86_BUILTIN_SCALEFBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefbf16_v16bf_mask, "__builtin_ia32_scalefbf16256_mask", IX86_BUILTIN_SCALEFBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefbf16_v8bf, "__builtin_ia32_scalefbf16128", IX86_BUILTIN_SCALEFBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefbf16_v8bf_mask, "__builtin_ia32_scalefbf16128_mask", IX86_BUILTIN_SCALEFBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddbf16_v32bf_mask, "__builtin_ia32_fmaddbf16512_mask", IX86_BUILTIN_FMADDBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddbf16_v32bf_mask3, "__builtin_ia32_fmaddbf16512_mask3", IX86_BUILTIN_FMADDBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddbf16_v32bf_maskz, "__builtin_ia32_fmaddbf16512_maskz", IX86_BUILTIN_FMADDBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI) @@ -3257,12 +3257,12 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v16bf_maskz, BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v8bf_mask, "__builtin_ia32_fnmsubbf16128_mask", IX86_BUILTIN_FNMSUBBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v8bf_mask3, "__builtin_ia32_fnmsubbf16128_mask3", IX86_BUILTIN_FNMSUBBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubbf16_v8bf_maskz, "__builtin_ia32_fnmsubbf16128_maskz", IX86_BUILTIN_FNMSUBBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rsqrtpbf16_v32bf_mask, "__builtin_ia32_rsqrtpbf16512_mask", IX86_BUILTIN_RSQRTPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rsqrtpbf16_v16bf_mask, "__builtin_ia32_rsqrtpbf16256_mask", IX86_BUILTIN_RSQRTPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rsqrtpbf16_v8bf_mask, "__builtin_ia32_rsqrtpbf16128_mask", IX86_BUILTIN_RSQRTPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sqrtnepbf16_v32bf_mask, "__builtin_ia32_sqrtnepbf16512_mask", IX86_BUILTIN_SQRTNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sqrtnepbf16_v16bf_mask, "__builtin_ia32_sqrtnepbf16256_mask", IX86_BUILTIN_SQRTNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sqrtnepbf16_v8bf_mask, "__builtin_ia32_sqrtnepbf16128_mask", IX86_BUILTIN_SQRTNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rsqrtbf16_v32bf_mask, "__builtin_ia32_rsqrtbf16512_mask", IX86_BUILTIN_RSQRTBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rsqrtbf16_v16bf_mask, "__builtin_ia32_rsqrtbf16256_mask", IX86_BUILTIN_RSQRTBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rsqrtbf16_v8bf_mask, "__builtin_ia32_rsqrtbf16128_mask", IX86_BUILTIN_RSQRTBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sqrtbf16_v32bf_mask, "__builtin_ia32_sqrtbf16512_mask", IX86_BUILTIN_SQRTBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sqrtbf16_v16bf_mask, "__builtin_ia32_sqrtbf16256_mask", IX86_BUILTIN_SQRTBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sqrtbf16_v8bf_mask, "__builtin_ia32_sqrtbf16128_mask", IX86_BUILTIN_SQRTBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rcpbf16_v32bf_mask, "__builtin_ia32_rcpbf16512_mask", IX86_BUILTIN_RCPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rcpbf16_v16bf_mask, "__builtin_ia32_rcpbf16256_mask", IX86_BUILTIN_RCPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rcpbf16_v8bf_mask, "__builtin_ia32_rcpbf16128_mask", IX86_BUILTIN_RCPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7f84498..1cda627 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -229,7 +229,7 @@ UNSPEC_VCVTNEPH2HF8 UNSPEC_VCVTNEPH2HF8S UNSPEC_VCVTHF82PH - UNSPEC_VSCALEFPBF16 + UNSPEC_VSCALEFBF16 UNSPEC_VRNDSCALEBF16 UNSPEC_VREDUCEBF16 UNSPEC_VGETMANTBF16 @@ -32075,14 +32075,14 @@ "vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}" [(set_attr "prefix" "evex")]) -(define_insn "avx10_2_scalefpbf16_" +(define_insn "avx10_2_scalefbf16_" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (unspec:VBF_AVX10_2 [(match_operand:VBF_AVX10_2 1 "register_operand" "v") (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")] - UNSPEC_VSCALEFPBF16))] + UNSPEC_VSCALEFBF16))] "TARGET_AVX10_2_256" - "vscalefpbf16\t{%2, %1, %0|%0, %1, %2}" + "vscalefbf16\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex")]) (define_expand "3" @@ -32371,21 +32371,21 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "avx10_2_rsqrtpbf16_" +(define_insn "avx10_2_rsqrtbf16_" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (unspec:VBF_AVX10_2 [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")] UNSPEC_RSQRT))] "TARGET_AVX10_2_256" - "vrsqrtpbf16\t{%1, %0|%0, %1}" + "vrsqrtbf16\t{%1, %0|%0, %1}" [(set_attr "prefix" "evex")]) -(define_insn "avx10_2_sqrtnepbf16_" +(define_insn "avx10_2_sqrtbf16_" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (sqrt:VBF_AVX10_2 (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2_256" - "vsqrtnepbf16\t{%1, %0|%0, %1}" + "vsqrtbf16\t{%1, %0|%0, %1}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_rcpbf16_" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c index df19413..c7d47b3 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf16-1.c @@ -18,9 +18,9 @@ /* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmadd231bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -37,12 +37,12 @@ /* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfnmsub231bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcpbf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ @@ -115,9 +115,9 @@ avx10_2_512_test (void) res = _mm512_rsqrt_pbh (x1); res = _mm512_mask_rsqrt_pbh (res, m32, x1); res = _mm512_maskz_rsqrt_pbh (m32, x1); - res = _mm512_sqrtne_pbh (x1); - res = _mm512_mask_sqrtne_pbh (res, m32, x1); - res = _mm512_maskz_sqrtne_pbh (m32, x1); + res = _mm512_sqrt_pbh (x1); + res = _mm512_mask_sqrt_pbh (res, m32, x1); + res = _mm512_maskz_sqrt_pbh (m32, x1); res = _mm512_rcp_pbh (x1); res = _mm512_mask_rcp_pbh (res, m32, x1); res = _mm512_maskz_rcp_pbh (m32, x1); diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vrsqrtbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vrsqrtbf16-2.c new file mode 100644 index 0000000..3858c1c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vrsqrtbf16-2.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE], res_ref2[SIZE]; + + for (i = 0; i < SIZE; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float s1 = 2.0; + float rs = 1.0 / sqrtf (s1); + src1.a[i] = convert_fp32_to_bf16 (s1); + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16 (rs); + } + + res1.x = INTRINSIC (_rsqrt_pbh) (src1.x); + res2.x = INTRINSIC (_mask_rsqrt_pbh) (res2.x, mask, src1.x); + res3.x = INTRINSIC (_maskz_rsqrt_pbh) (mask, src1.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vrsqrtpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vrsqrtpbf16-2.c deleted file mode 100644 index 3858c1c..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vrsqrtpbf16-2.c +++ /dev/null @@ -1,48 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#include -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; - MASK_TYPE mask = MASK_VALUE; - unsigned short res_ref[SIZE], res_ref2[SIZE]; - - for (i = 0; i < SIZE; i++) - { - res1.a[i] = 0; - res2.a[i] = DEFAULT_VALUE; - res3.a[i] = DEFAULT_VALUE; - float s1 = 2.0; - float rs = 1.0 / sqrtf (s1); - src1.a[i] = convert_fp32_to_bf16 (s1); - res_ref[i] = res_ref2[i] = convert_fp32_to_bf16 (rs); - } - - res1.x = INTRINSIC (_rsqrt_pbh) (src1.x); - res2.x = INTRINSIC (_mask_rsqrt_pbh) (res2.x, mask, src1.x); - res3.x = INTRINSIC (_maskz_rsqrt_pbh) (mask, src1.x); - - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) - abort (); - - MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) - abort (); - - MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefbf16-2.c new file mode 100644 index 0000000..f3f588d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefbf16-2.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE], res_ref2[SIZE]; + + for (i = 0; i < SIZE; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float x = (float) (2 * (i % 7) + 7); + float y = 1.0 + (float) (4 * i) / (float) SIZE; + float xx, yy, res; + src2.a[i] = convert_fp32_to_bf16 (y); + src1.a[i] = convert_fp32_to_bf16 (x); + xx = convert_bf16_to_fp32 (src1.a[i]); + yy = convert_bf16_to_fp32 (src2.a[i]); + res = scalef (xx, yy); + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16 (res); + } + + res1.x = INTRINSIC (_scalef_pbh) (src1.x, src2.x); + res2.x = INTRINSIC (_mask_scalef_pbh) (res2.x, mask, src1.x, src2.x); + res3.x = INTRINSIC (_maskz_scalef_pbh) (mask, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c deleted file mode 100644 index f3f588d..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vscalefpbf16-2.c +++ /dev/null @@ -1,52 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2; - MASK_TYPE mask = MASK_VALUE; - unsigned short res_ref[SIZE], res_ref2[SIZE]; - - for (i = 0; i < SIZE; i++) - { - res1.a[i] = 0; - res2.a[i] = DEFAULT_VALUE; - res3.a[i] = DEFAULT_VALUE; - float x = (float) (2 * (i % 7) + 7); - float y = 1.0 + (float) (4 * i) / (float) SIZE; - float xx, yy, res; - src2.a[i] = convert_fp32_to_bf16 (y); - src1.a[i] = convert_fp32_to_bf16 (x); - xx = convert_bf16_to_fp32 (src1.a[i]); - yy = convert_bf16_to_fp32 (src2.a[i]); - res = scalef (xx, yy); - res_ref[i] = res_ref2[i] = convert_fp32_to_bf16 (res); - } - - res1.x = INTRINSIC (_scalef_pbh) (src1.x, src2.x); - res2.x = INTRINSIC (_mask_scalef_pbh) (res2.x, mask, src1.x, src2.x); - res3.x = INTRINSIC (_maskz_scalef_pbh) (mask, src1.x, src2.x); - - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) - abort (); - - MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) - abort (); - - MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vsqrtbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vsqrtbf16-2.c new file mode 100644 index 0000000..09d87ec --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vsqrtbf16-2.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif +#include "avx10-helper.h" +#include +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; + MASK_TYPE mask = MASK_VALUE; + unsigned short res_ref[SIZE], res_ref2[SIZE]; + + for (i = 0; i < SIZE; i++) + { + res1.a[i] = 0; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + float s1 = i + 1.0; + float rs = sqrtf (s1); + src1.a[i] = convert_fp32_to_bf16_ne (s1); + res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (rs); + } + + res1.x = INTRINSIC (_sqrt_pbh) (src1.x); + res2.x = INTRINSIC (_mask_sqrt_pbh) (res2.x, mask, src1.x); + res3.x = INTRINSIC (_maskz_sqrt_pbh) (mask, src1.x); + + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) + abort (); + + MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) + abort (); + + MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vsqrtnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vsqrtnepbf16-2.c deleted file mode 100644 index 40b085f..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vsqrtnepbf16-2.c +++ /dev/null @@ -1,48 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif -#include "avx10-helper.h" -#include -#define SIZE (AVX512F_LEN / 16) -#include "avx512f-mask-type.h" - -void -TEST (void) -{ - int i; - UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1; - MASK_TYPE mask = MASK_VALUE; - unsigned short res_ref[SIZE], res_ref2[SIZE]; - - for (i = 0; i < SIZE; i++) - { - res1.a[i] = 0; - res2.a[i] = DEFAULT_VALUE; - res3.a[i] = DEFAULT_VALUE; - float s1 = i + 1.0; - float rs = sqrtf (s1); - src1.a[i] = convert_fp32_to_bf16_ne (s1); - res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (rs); - } - - res1.x = INTRINSIC (_sqrtne_pbh) (src1.x); - res2.x = INTRINSIC (_mask_sqrtne_pbh) (res2.x, mask, src1.x); - res3.x = INTRINSIC (_maskz_sqrtne_pbh) (mask, src1.x); - - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref)) - abort (); - - MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2)) - abort (); - - MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c index 74addd9..1e89c5c 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf16-1.c @@ -36,12 +36,12 @@ /* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vminbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vscalefpbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefbf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmadd132bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmadd231bf16\[ \\t\]+%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -74,18 +74,18 @@ /* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfnmsub231bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfnmsub132bf16\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vrsqrtpbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vsqrtnepbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vrsqrtbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vsqrtbf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcpbf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ @@ -228,12 +228,12 @@ avx10_2_test (void) res1 = _mm_mask_rsqrt_pbh (res1, m8, x3); res1 = _mm_maskz_rsqrt_pbh (m8, x3); - res = _mm256_sqrtne_pbh (x1); - res = _mm256_mask_sqrtne_pbh (res, m16, x1); - res = _mm256_maskz_sqrtne_pbh (m16, x1); - res1 = _mm_sqrtne_pbh (x3); - res1 = _mm_mask_sqrtne_pbh (res1, m8, x3); - res1 = _mm_maskz_sqrtne_pbh (m8, x3); + res = _mm256_sqrt_pbh (x1); + res = _mm256_mask_sqrt_pbh (res, m16, x1); + res = _mm256_maskz_sqrt_pbh (m16, x1); + res1 = _mm_sqrt_pbh (x3); + res1 = _mm_mask_sqrt_pbh (res1, m8, x3); + res1 = _mm_maskz_sqrt_pbh (m8, x3); res = _mm256_rcp_pbh (x1); res = _mm256_mask_rcp_pbh (res, m16, x1); diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vrsqrtbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vrsqrtbf16-2.c new file mode 100644 index 0000000..6083c86 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vrsqrtbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vrsqrtbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vrsqrtbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vrsqrtpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vrsqrtpbf16-2.c deleted file mode 100644 index be0f561..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vrsqrtpbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vrsqrtpbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vrsqrtpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vscalefbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vscalefbf16-2.c new file mode 100644 index 0000000..81b24f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vscalefbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vscalefbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vscalefbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c deleted file mode 100644 index 02753f7..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vscalefpbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vscalefpbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vscalefpbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vsqrtbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vsqrtbf16-2.c new file mode 100644 index 0000000..5188e05 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vsqrtbf16-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vsqrtbf16-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vsqrtbf16-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vsqrtnepbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vsqrtnepbf16-2.c deleted file mode 100644 index 4d0e836..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vsqrtnepbf16-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vsqrtnepbf16-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vsqrtnepbf16-2.c" -- cgit v1.1