From f1056463cb4c7950fc1bada6485c14df71ea3dd7 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Thu, 23 Jan 2025 09:52:20 +0800 Subject: i386: Change mnemonics from VCVTNEPH2[B,H]F8 to VCVTPH2[B,H]F8 gcc/ChangeLog: PR target/118270 * config/i386/avx10_2-512convertintrin.h: Change intrin and builtin name according to new mnemonics. * config/i386/avx10_2convertintrin.h: Ditto. * config/i386/i386-builtin.def (BDESC): Ditto. * config/i386/sse.md (UNSPEC_VCVTPH2BF8): Rename from UNSPEC_VCVTNEPH2BF8. (UNSPEC_VCVTPH2BF8S): Rename from UNSPEC_VCVTNEPH2BF8S. (UNSPEC_VCVTPH2HF8): Rename from UNSPEC_VCVTNEPH2HF8. (UNSPEC_VCVTPH2HF8S): Rename from UNSPEC_VCVTNEPH2HF8S. (UNSPEC_CONVERTPH2FP8): Rename from UNSPEC_NECONVERTPH2FP8. Adjust UNSPEC name. (convertph2fp8): Rename from neconvertph2fp8. Adjust iterator map. (vcvtv8hf): Rename to... (vcvtv8hf): ...this. (*vcvtv8hf): Rename to... (*vcvtv8hf): ...this. (vcvtv8hf_mask): Rename to... (vcvtv8hf_mask): ...this. (*vcvtv8hf_mask): Rename to... (*vcvtv8hf_mask): ...this. (vcvt): Rename to... (vcvt): ...this. gcc/testsuite/ChangeLog: PR target/118270 * gcc.target/i386/avx10_2-512-convert-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c: Move to... * gcc.target/i386/avx10_2-512-vcvtph2bf8-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c: Move to... * gcc.target/i386/avx10_2-512-vcvtph2bf8s-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c: Move to... * gcc.target/i386/avx10_2-512-vcvtph2hf8-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c: Move to... * gcc.target/i386/avx10_2-512-vcvtph2hf8s-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-convert-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-vcvtneph2bf8-2.c: Move to... * gcc.target/i386/avx10_2-vcvtph2bf8-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vcvtneph2hf8-2.c: Move to... * gcc.target/i386/avx10_2-vcvtph2bf8s-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c: Move to... * gcc.target/i386/avx10_2-vcvtph2hf8-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c: Move to... * gcc.target/i386/avx10_2-vcvtph2hf8s-2.c: ...here. Adjust intrin call. --- gcc/config/i386/avx10_2-512convertintrin.h | 112 +++++------ gcc/config/i386/avx10_2convertintrin.h | 224 ++++++++++----------- gcc/config/i386/i386-builtin.def | 24 +-- gcc/config/i386/sse.md | 50 ++--- .../gcc.target/i386/avx10_2-512-convert-1.c | 56 +++--- .../gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c | 76 ------- .../gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c | 76 ------- .../gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c | 76 ------- .../gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c | 76 ------- .../gcc.target/i386/avx10_2-512-vcvtph2bf8-2.c | 76 +++++++ .../gcc.target/i386/avx10_2-512-vcvtph2bf8s-2.c | 76 +++++++ .../gcc.target/i386/avx10_2-512-vcvtph2hf8-2.c | 76 +++++++ .../gcc.target/i386/avx10_2-512-vcvtph2hf8s-2.c | 76 +++++++ gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c | 104 +++++----- .../gcc.target/i386/avx10_2-vcvtneph2bf8-2.c | 16 -- .../gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c | 16 -- .../gcc.target/i386/avx10_2-vcvtneph2hf8-2.c | 16 -- .../gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c | 16 -- .../gcc.target/i386/avx10_2-vcvtph2bf8-2.c | 16 ++ .../gcc.target/i386/avx10_2-vcvtph2bf8s-2.c | 16 ++ .../gcc.target/i386/avx10_2-vcvtph2hf8-2.c | 16 ++ .../gcc.target/i386/avx10_2-vcvtph2hf8s-2.c | 16 ++ 22 files changed, 653 insertions(+), 653 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2bf8-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2bf8s-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2hf8-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2hf8s-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2bf8-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2bf8s-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2hf8-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2hf8s-2.c (limited to 'gcc') diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h index c753dd7..5c64b9f 100644 --- a/gcc/config/i386/avx10_2-512convertintrin.h +++ b/gcc/config/i386/avx10_2-512convertintrin.h @@ -426,118 +426,118 @@ _mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A) extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtneph_pbf8 (__m512h __A) +_mm512_cvtph_bf8 (__m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtneph_pbf8 (__m256i __W, __mmask32 __U, __m512h __A) +_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtneph_pbf8 (__mmask32 __U, __m512h __A) +_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtnesph_pbf8 (__m512h __A) +_mm512_cvtsph_bf8 (__m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtnesph_pbf8 (__m256i __W, __mmask32 __U, __m512h __A) +_mm512_mask_cvtsph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtnesph_pbf8 (__mmask32 __U, __m512h __A) +_mm512_maskz_cvtsph_bf8 (__mmask32 __U, __m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtneph_phf8 (__m512h __A) +_mm512_cvtph_hf8 (__m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtneph_phf8 (__m256i __W, __mmask32 __U, __m512h __A) +_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A, - (__v32qi)(__m256i) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi)(__m256i) __W, + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtneph_phf8 (__mmask32 __U, __m512h __A) +_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtnesph_phf8 (__m512h __A) +_mm512_cvtsph_hf8 (__m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtnesph_phf8 (__m256i __W, __mmask32 __U, __m512h __A) +_mm512_mask_cvtsph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtnesph_phf8 (__mmask32 __U, __m512h __A) +_mm512_maskz_cvtsph_hf8 (__mmask32 __U, __m512h __A) { - return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); } extern __inline __m512h diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h index 45d2bff..8635566 100644 --- a/gcc/config/i386/avx10_2convertintrin.h +++ b/gcc/config/i386/avx10_2convertintrin.h @@ -740,234 +740,234 @@ _mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A) extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtneph_pbf8 (__m128h __A) +_mm_cvtph_bf8 (__m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A, - (__v16qi)(__m128i) - _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vcvtph2bf8128_mask ((__v8hf) __A, + (__v16qi)(__m128i) + _mm_undefined_si128 (), + (__mmask8) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtneph_pbf8 (__m128i __W, __mmask8 __U, __m128h __A) +_mm_mask_cvtph_bf8 (__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A, - (__v16qi)(__m128i) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_vcvtph2bf8128_mask ((__v8hf) __A, + (__v16qi)(__m128i) __W, + (__mmask8) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtneph_pbf8 (__mmask8 __U, __m128h __A) +_mm_maskz_cvtph_bf8 (__mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A, - (__v16qi)(__m128i) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_vcvtph2bf8128_mask ((__v8hf) __A, + (__v16qi)(__m128i) + _mm_setzero_si128 (), + (__mmask8) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtneph_pbf8 (__m256h __A) +_mm256_cvtph_bf8 (__m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A, - (__v16qi)(__m128i) - _mm_undefined_si128 (), - (__mmask16) -1); + return (__m128i) __builtin_ia32_vcvtph2bf8256_mask ((__v16hf) __A, + (__v16qi)(__m128i) + _mm_undefined_si128 (), + (__mmask16) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtneph_pbf8 (__m128i __W, __mmask16 __U, __m256h __A) +_mm256_mask_cvtph_bf8 (__m128i __W, __mmask16 __U, __m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A, - (__v16qi)(__m128i) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvtph2bf8256_mask ((__v16hf) __A, + (__v16qi)(__m128i) __W, + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtneph_pbf8 (__mmask16 __U, __m256h __A) +_mm256_maskz_cvtph_bf8 (__mmask16 __U, __m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A, - (__v16qi)(__m128i) - _mm_setzero_si128 (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvtph2bf8256_mask ((__v16hf) __A, + (__v16qi)(__m128i) + _mm_setzero_si128 (), + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtnesph_pbf8 (__m128h __A) +_mm_cvtsph_bf8 (__m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A, - (__v16qi)(__m128i) - _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vcvtph2bf8s128_mask ((__v8hf) __A, + (__v16qi)(__m128i) + _mm_undefined_si128 (), + (__mmask8) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtnesph_pbf8 (__m128i __W, __mmask8 __U, __m128h __A) +_mm_mask_cvtsph_bf8 (__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A, - (__v16qi)(__m128i) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_vcvtph2bf8s128_mask ((__v8hf) __A, + (__v16qi)(__m128i) __W, + (__mmask8) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtnesph_pbf8 (__mmask8 __U, __m128h __A) +_mm_maskz_cvtsph_bf8 (__mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A, - (__v16qi)(__m128i) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_vcvtph2bf8s128_mask ((__v8hf) __A, + (__v16qi)(__m128i) + _mm_setzero_si128 (), + (__mmask8) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtnesph_pbf8 (__m256h __A) +_mm256_cvtsph_bf8 (__m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A, - (__v16qi)(__m128i) - _mm_undefined_si128 (), - (__mmask16) -1); + return (__m128i) __builtin_ia32_vcvtph2bf8s256_mask ((__v16hf) __A, + (__v16qi)(__m128i) + _mm_undefined_si128 (), + (__mmask16) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtnesph_pbf8 (__m128i __W, __mmask16 __U, __m256h __A) +_mm256_mask_cvtsph_bf8 (__m128i __W, __mmask16 __U, __m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A, - (__v16qi)(__m128i) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvtph2bf8s256_mask ((__v16hf) __A, + (__v16qi)(__m128i) __W, + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtnesph_pbf8 (__mmask16 __U, __m256h __A) +_mm256_maskz_cvtsph_bf8 (__mmask16 __U, __m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A, - (__v16qi)(__m128i) - _mm_setzero_si128 (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvtph2bf8s256_mask ((__v16hf) __A, + (__v16qi)(__m128i) + _mm_setzero_si128 (), + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtneph_phf8 (__m128h __A) +_mm_cvtph_hf8 (__m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A, - (__v16qi)(__m128i) - _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vcvtph2hf8128_mask ((__v8hf) __A, + (__v16qi)(__m128i) + _mm_undefined_si128 (), + (__mmask8) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtneph_phf8 (__m128i __W, __mmask8 __U, __m128h __A) +_mm_mask_cvtph_hf8 (__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A, - (__v16qi)(__m128i) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_vcvtph2hf8128_mask ((__v8hf) __A, + (__v16qi)(__m128i) __W, + (__mmask8) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtneph_phf8 (__mmask8 __U, __m128h __A) +_mm_maskz_cvtph_hf8 (__mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A, - (__v16qi)(__m128i) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_vcvtph2hf8128_mask ((__v8hf) __A, + (__v16qi)(__m128i) + _mm_setzero_si128 (), + (__mmask8) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtneph_phf8 (__m256h __A) +_mm256_cvtph_hf8 (__m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A, - (__v16qi)(__m128i) - _mm_undefined_si128 (), - (__mmask16) -1); + return (__m128i) __builtin_ia32_vcvtph2hf8256_mask ((__v16hf) __A, + (__v16qi)(__m128i) + _mm_undefined_si128 (), + (__mmask16) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtneph_phf8 (__m128i __W, __mmask16 __U, __m256h __A) +_mm256_mask_cvtph_hf8 (__m128i __W, __mmask16 __U, __m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A, - (__v16qi)(__m128i) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvtph2hf8256_mask ((__v16hf) __A, + (__v16qi)(__m128i) __W, + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtneph_phf8 (__mmask16 __U, __m256h __A) +_mm256_maskz_cvtph_hf8 (__mmask16 __U, __m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A, - (__v16qi)(__m128i) - _mm_setzero_si128 (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvtph2hf8256_mask ((__v16hf) __A, + (__v16qi)(__m128i) + _mm_setzero_si128 (), + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtnesph_phf8 (__m128h __A) +_mm_cvtsph_hf8 (__m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A, - (__v16qi)(__m128i) - _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vcvtph2hf8s128_mask ((__v8hf) __A, + (__v16qi)(__m128i) + _mm_undefined_si128 (), + (__mmask8) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtnesph_phf8 (__m128i __W, __mmask8 __U, __m128h __A) +_mm_mask_cvtsph_hf8 (__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A, - (__v16qi)(__m128i) __W, - (__mmask8) __U); + return (__m128i) __builtin_ia32_vcvtph2hf8s128_mask ((__v8hf) __A, + (__v16qi)(__m128i) __W, + (__mmask8) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtnesph_phf8 (__mmask8 __U, __m128h __A) +_mm_maskz_cvtsph_hf8 (__mmask8 __U, __m128h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A, - (__v16qi)(__m128i) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i) __builtin_ia32_vcvtph2hf8s128_mask ((__v8hf) __A, + (__v16qi)(__m128i) + _mm_setzero_si128 (), + (__mmask8) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtnesph_phf8 (__m256h __A) +_mm256_cvtsph_hf8 (__m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A, - (__v16qi)(__m128i) - _mm_undefined_si128 (), - (__mmask16) -1); + return (__m128i) __builtin_ia32_vcvtph2hf8s256_mask ((__v16hf) __A, + (__v16qi)(__m128i) + _mm_undefined_si128 (), + (__mmask16) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtnesph_phf8 (__m128i __W, __mmask16 __U, __m256h __A) +_mm256_mask_cvtsph_hf8 (__m128i __W, __mmask16 __U, __m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A, - (__v16qi)(__m128i) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvtph2hf8s256_mask ((__v16hf) __A, + (__v16qi)(__m128i) __W, + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtnesph_phf8 (__mmask16 __U, __m256h __A) +_mm256_maskz_cvtsph_hf8 (__mmask16 __U, __m256h __A) { - return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A, - (__v16qi)(__m128i) - _mm_setzero_si128 (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvtph2hf8s256_mask ((__v16hf) __A, + (__v16qi)(__m128i) + _mm_setzero_si128 (), + (__mmask16) __U); } extern __inline __m128h diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index edd5c2b..5880f42 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3164,18 +3164,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvt2ph2hf8v32hf_mask, "__built BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2hf8sv8hf_mask, "__builtin_ia32_vcvt2ph2hf8s128_mask", IX86_BUILTIN_VCVT2PH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2hf8sv16hf_mask, "__builtin_ia32_vcvt2ph2hf8s256_mask", IX86_BUILTIN_VCVT2PH2HF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvt2ph2hf8sv32hf_mask, "__builtin_ia32_vcvt2ph2hf8s512_mask", IX86_BUILTIN_VCVT2PH2HF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8v8hf_mask, "__builtin_ia32_vcvtneph2bf8128_mask", IX86_BUILTIN_VCVTNEPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8v16hf_mask, "__builtin_ia32_vcvtneph2bf8256_mask", IX86_BUILTIN_VCVTNEPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2bf8v32hf_mask, "__builtin_ia32_vcvtneph2bf8512_mask", IX86_BUILTIN_VCVTNEPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8sv8hf_mask, "__builtin_ia32_vcvtneph2bf8s128_mask", IX86_BUILTIN_VCVTNEPH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8sv16hf_mask, "__builtin_ia32_vcvtneph2bf8s256_mask", IX86_BUILTIN_VCVTNEPH2BF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2bf8sv32hf_mask, "__builtin_ia32_vcvtneph2bf8s512_mask", IX86_BUILTIN_VCVTNEPH2BF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8v8hf_mask, "__builtin_ia32_vcvtneph2hf8128_mask", IX86_BUILTIN_VCVTNEPH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8v16hf_mask, "__builtin_ia32_vcvtneph2hf8256_mask", IX86_BUILTIN_VCVTNEPH2HF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2hf8v32hf_mask, "__builtin_ia32_vcvtneph2hf8512_mask", IX86_BUILTIN_VCVTNEPH2HF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8sv8hf_mask, "__builtin_ia32_vcvtneph2hf8s128_mask", IX86_BUILTIN_VCVTNEPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8sv16hf_mask, "__builtin_ia32_vcvtneph2hf8s256_mask", IX86_BUILTIN_VCVTNEPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2hf8sv32hf_mask, "__builtin_ia32_vcvtneph2hf8s512_mask", IX86_BUILTIN_VCVTNEPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2bf8v8hf_mask, "__builtin_ia32_vcvtph2bf8128_mask", IX86_BUILTIN_VCVTPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2bf8v16hf_mask, "__builtin_ia32_vcvtph2bf8256_mask", IX86_BUILTIN_VCVTPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtph2bf8v32hf_mask, "__builtin_ia32_vcvtph2bf8512_mask", IX86_BUILTIN_VCVTPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2bf8sv8hf_mask, "__builtin_ia32_vcvtph2bf8s128_mask", IX86_BUILTIN_VCVTPH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2bf8sv16hf_mask, "__builtin_ia32_vcvtph2bf8s256_mask", IX86_BUILTIN_VCVTPH2BF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtph2bf8sv32hf_mask, "__builtin_ia32_vcvtph2bf8s512_mask", IX86_BUILTIN_VCVTPH2BF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2hf8v8hf_mask, "__builtin_ia32_vcvtph2hf8128_mask", IX86_BUILTIN_VCVTPH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2hf8v16hf_mask, "__builtin_ia32_vcvtph2hf8256_mask", IX86_BUILTIN_VCVTPH2HF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtph2hf8v32hf_mask, "__builtin_ia32_vcvtph2hf8512_mask", IX86_BUILTIN_VCVTPH2HF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2hf8sv8hf_mask, "__builtin_ia32_vcvtph2hf8s128_mask", IX86_BUILTIN_VCVTPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2hf8sv16hf_mask, "__builtin_ia32_vcvtph2hf8s256_mask", IX86_BUILTIN_VCVTPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtph2hf8sv32hf_mask, "__builtin_ia32_vcvtph2hf8s512_mask", IX86_BUILTIN_VCVTPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv8hf_mask, "__builtin_ia32_vcvthf82ph128_mask", IX86_BUILTIN_VCVTHF82PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V16QI_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv16hf_mask, "__builtin_ia32_vcvthf82ph256_mask", IX86_BUILTIN_VCVTHF82PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16QI_V16HF_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvthf82phv32hf_mask, "__builtin_ia32_vcvthf82ph512_mask", IX86_BUILTIN_VCVTHF82PH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32QI_V32HF_USI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 582942c..74fc141 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -224,10 +224,10 @@ UNSPEC_VCVT2PH2BF8S UNSPEC_VCVT2PH2HF8 UNSPEC_VCVT2PH2HF8S - UNSPEC_VCVTNEPH2BF8 - UNSPEC_VCVTNEPH2BF8S - UNSPEC_VCVTNEPH2HF8 - UNSPEC_VCVTNEPH2HF8S + UNSPEC_VCVTPH2BF8 + UNSPEC_VCVTPH2BF8S + UNSPEC_VCVTPH2HF8 + UNSPEC_VCVTPH2HF8S UNSPEC_VCVTHF82PH UNSPEC_VSCALEFBF16 UNSPEC_VRNDSCALEBF16 @@ -31837,45 +31837,45 @@ (define_mode_attr ph2fp8suff [(V32HF "") (V16HF "{y}") (V8HF "{x}")]) -(define_int_iterator UNSPEC_NECONVERTPH2FP8 - [UNSPEC_VCVTNEPH2BF8 UNSPEC_VCVTNEPH2BF8S - UNSPEC_VCVTNEPH2HF8 UNSPEC_VCVTNEPH2HF8S]) +(define_int_iterator UNSPEC_CONVERTPH2FP8 + [UNSPEC_VCVTPH2BF8 UNSPEC_VCVTPH2BF8S + UNSPEC_VCVTPH2HF8 UNSPEC_VCVTPH2HF8S]) -(define_int_attr neconvertph2fp8 - [(UNSPEC_VCVTNEPH2BF8 "neph2bf8") - (UNSPEC_VCVTNEPH2BF8S "neph2bf8s") - (UNSPEC_VCVTNEPH2HF8 "neph2hf8") - (UNSPEC_VCVTNEPH2HF8S "neph2hf8s")]) +(define_int_attr convertph2fp8 + [(UNSPEC_VCVTPH2BF8 "ph2bf8") + (UNSPEC_VCVTPH2BF8S "ph2bf8s") + (UNSPEC_VCVTPH2HF8 "ph2hf8") + (UNSPEC_VCVTPH2HF8S "ph2hf8s")]) -(define_expand "vcvtv8hf" +(define_expand "vcvtv8hf" [(set (match_operand:V16QI 0 "register_operand") (vec_concat:V16QI (unspec:V8QI [(match_operand:V8HF 1 "nonimmediate_operand")] - UNSPEC_NECONVERTPH2FP8) + UNSPEC_CONVERTPH2FP8) (match_dup 2)))] "TARGET_AVX10_2_256" "operands[2] = CONST0_RTX (V8QImode);") -(define_insn "*vcvtv8hf" +(define_insn "*vcvtv8hf" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI (unspec:V8QI [(match_operand:V8HF 1 "nonimmediate_operand" "vm")] - UNSPEC_NECONVERTPH2FP8) + UNSPEC_CONVERTPH2FP8) (match_operand:V8QI 2 "const0_operand")))] "TARGET_AVX10_2_256" - "vcvt{x}\t{%1, %0|%0, %1}" + "vcvt{x}\t{%1, %0|%0, %1}" [(set_attr "prefix" "evex") (set_attr "mode" "HF")]) -(define_expand "vcvtv8hf_mask" +(define_expand "vcvtv8hf_mask" [(set (match_operand:V16QI 0 "register_operand") (vec_concat:V16QI (vec_merge:V8QI (unspec:V8QI [(match_operand:V8HF 1 "nonimmediate_operand")] - UNSPEC_NECONVERTPH2FP8) + UNSPEC_CONVERTPH2FP8) (vec_select:V8QI (match_operand:V16QI 2 "nonimm_or_0_operand") (parallel [(const_int 0) (const_int 1) @@ -31887,13 +31887,13 @@ "TARGET_AVX10_2_256" "operands[4] = CONST0_RTX (V8QImode);") -(define_insn "*vcvtv8hf_mask" +(define_insn "*vcvtv8hf_mask" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI (vec_merge:V8QI (unspec:V8QI [(match_operand:V8HF 1 "nonimmediate_operand" "vm")] - UNSPEC_NECONVERTPH2FP8) + UNSPEC_CONVERTPH2FP8) (vec_select:V8QI (match_operand:V16QI 2 "nonimm_or_0_operand" "0C") (parallel [(const_int 0) (const_int 1) @@ -31903,16 +31903,16 @@ (match_operand:QI 3 "register_operand" "Yk")) (match_operand:V8QI 4 "const0_operand")))] "TARGET_AVX10_2_256" - "vcvt{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + "vcvt{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "prefix" "evex")]) -(define_insn "vcvt" +(define_insn "vcvt" [(set (match_operand: 0 "register_operand" "=v") (unspec: [(match_operand:VHF_256_512 1 "nonimmediate_operand" "vm")] - UNSPEC_NECONVERTPH2FP8))] + UNSPEC_CONVERTPH2FP8))] "TARGET_AVX10_2_256" - "vcvt\t{%1, %0|%0, %1}" + "vcvt\t{%1, %0|%0, %1}" [(set_attr "prefix" "evex")]) (define_insn "vcvthf82ph" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c index 955c862..58db35d 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c @@ -33,18 +33,18 @@ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8s\[ \\t\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %zmm\[0-9]\+, %zmm\[0-9]\+(?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %zmm\[0-9]\+, %zmm\[0-9]\+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%zmm\[0-9\](?:\n|\[ \\t\]+#)" 2 } } */ @@ -148,35 +148,35 @@ avx10_2_512_vcvthf82ph_test (void) } void extern -avx10_2_512_vcvtneph2bf8_test (void) +avx10_2_512_vcvtph2bf8_test (void) { - x256i = _mm512_cvtneph_pbf8 (x512h); - x256i = _mm512_mask_cvtneph_pbf8 (x256i, m32, x512h); - x256i = _mm512_maskz_cvtneph_pbf8 (m32, x512h); + x256i = _mm512_cvtph_bf8 (x512h); + x256i = _mm512_mask_cvtph_bf8 (x256i, m32, x512h); + x256i = _mm512_maskz_cvtph_bf8 (m32, x512h); } void extern -avx10_2_512_vcvtneph2bf8s_test (void) +avx10_2_512_vcvtph2bf8s_test (void) { - x256i = _mm512_cvtnesph_pbf8 (x512h); - x256i = _mm512_mask_cvtnesph_pbf8 (x256i, m32, x512h); - x256i = _mm512_maskz_cvtnesph_pbf8 (m32, x512h); + x256i = _mm512_cvtsph_bf8 (x512h); + x256i = _mm512_mask_cvtsph_bf8 (x256i, m32, x512h); + x256i = _mm512_maskz_cvtsph_bf8 (m32, x512h); } void extern -avx10_2_512_vcvtneph2hf8_test (void) +avx10_2_512_vcvtph2hf8_test (void) { - x256i = _mm512_cvtneph_phf8 (x512h); - x256i = _mm512_mask_cvtneph_phf8 (x256i, m32, x512h); - x256i = _mm512_maskz_cvtneph_phf8 (m32, x512h); + x256i = _mm512_cvtph_hf8 (x512h); + x256i = _mm512_mask_cvtph_hf8 (x256i, m32, x512h); + x256i = _mm512_maskz_cvtph_hf8 (m32, x512h); } void extern -avx10_2_512_vcvtneph2hf8s_test (void) +avx10_2_512_vcvtph2hf8s_test (void) { - x256i = _mm512_cvtnesph_phf8 (x512h); - x256i = _mm512_mask_cvtnesph_phf8 (x256i, m32, x512h); - x256i = _mm512_maskz_cvtnesph_phf8 (m32, x512h); + x256i = _mm512_cvtsph_hf8 (x512h); + x256i = _mm512_mask_cvtsph_hf8 (x256i, m32, x512h); + x256i = _mm512_maskz_cvtsph_hf8 (m32, x512h); } void extern diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c deleted file mode 100644 index 96ca7e8..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c +++ /dev/null @@ -1,76 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif - -#include "avx10-helper.h" -#include "fp8-helper.h" - -#define SIZE (AVX512F_LEN / 16) -#define SIZE_DST (AVX512F_LEN_HALF / 8) -#include "avx512f-mask-type.h" - -void -CALC (unsigned char *r, _Float16 *s) -{ - int i, hf8_bf8, saturate; - - hf8_bf8 = 1; - saturate = 0; - - for (i = 0; i < SIZE_DST; i++) - { - r[i] = 0; - if (i < SIZE) - { - Float16Union usrc = {.f16 = s[i]}; - r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); - } - } -} - -void -TEST (void) -{ - int i,sign; - UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; - UNION_TYPE (AVX512F_LEN, h) src; - MASK_TYPE mask = MASK_VALUE; - unsigned char res_ref[SIZE_DST]; - - sign = 1; - for (i = 0; i < SIZE; i++) - { - src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); - sign = -sign; - } - -#if AVX512F_LEN > 128 - for (i = 0; i < SIZE_DST; i++) - res2.a[i] = DEFAULT_VALUE; -#else - for (i = 0; i < SIZE; i++) - res2.a[i] = DEFAULT_VALUE; -#endif - - CALC(res_ref, src.a); - - res1.x = INTRINSIC (_cvtneph_pbf8) (src.x); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) - abort (); - - res2.x = INTRINSIC (_mask_cvtneph_pbf8) (res2.x, mask, src.x); - MASK_MERGE (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) - abort (); - - res3.x = INTRINSIC (_maskz_cvtneph_pbf8) (mask, src.x); - MASK_ZERO (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c deleted file mode 100644 index c458f1e..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c +++ /dev/null @@ -1,76 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif - -#include "avx10-helper.h" -#include "fp8-helper.h" - -#define SIZE (AVX512F_LEN / 16) -#define SIZE_DST (AVX512F_LEN_HALF / 8) -#include "avx512f-mask-type.h" - -void -CALC (unsigned char *r, _Float16 *s) -{ - int i, hf8_bf8, saturate; - - hf8_bf8 = 1; - saturate = 1; - - for (i = 0; i < SIZE_DST; i++) - { - r[i] = 0; - if (i < SIZE) - { - Float16Union usrc = {.f16 = s[i]}; - r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); - } - } -} - -void -TEST (void) -{ - int i,sign; - UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; - UNION_TYPE (AVX512F_LEN, h) src; - MASK_TYPE mask = MASK_VALUE; - unsigned char res_ref[SIZE_DST]; - - sign = 1; - for (i = 0; i < SIZE; i++) - { - src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); - sign = -sign; - } - -#if AVX512F_LEN > 128 - for (i = 0; i < SIZE_DST; i++) - res2.a[i] = DEFAULT_VALUE; -#else - for (i = 0; i < SIZE; i++) - res2.a[i] = DEFAULT_VALUE; -#endif - - CALC(res_ref, src.a); - - res1.x = INTRINSIC (_cvtnesph_pbf8) (src.x); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) - abort (); - - res2.x = INTRINSIC (_mask_cvtnesph_pbf8) (res2.x, mask, src.x); - MASK_MERGE (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) - abort (); - - res3.x = INTRINSIC (_maskz_cvtnesph_pbf8) (mask, src.x); - MASK_ZERO (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c deleted file mode 100644 index cb9cdbb..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c +++ /dev/null @@ -1,76 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif - -#include "avx10-helper.h" -#include "fp8-helper.h" - -#define SIZE (AVX512F_LEN / 16) -#define SIZE_DST (AVX512F_LEN_HALF / 8) -#include "avx512f-mask-type.h" - -void -CALC (unsigned char *r, _Float16 *s) -{ - int i, hf8_bf8, saturate; - - hf8_bf8 = 0; - saturate = 0; - - for (i = 0; i < SIZE_DST; i++) - { - r[i] = 0; - if (i < SIZE) - { - Float16Union usrc = {.f16 = s[i]}; - r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); - } - } -} - -void -TEST (void) -{ - int i,sign; - UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; - UNION_TYPE (AVX512F_LEN, h) src; - MASK_TYPE mask = MASK_VALUE; - unsigned char res_ref[SIZE]; - - sign = 1; - for (i = 0; i < SIZE; i++) - { - src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); - sign = -sign; - } - -#if AVX512F_LEN > 128 - for (i = 0; i < SIZE_DST; i++) - res2.a[i] = DEFAULT_VALUE; -#else - for (i = 0; i < SIZE; i++) - res2.a[i] = DEFAULT_VALUE; -#endif - - CALC(res_ref, src.a); - - res1.x = INTRINSIC (_cvtneph_phf8) (src.x); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) - abort (); - - res2.x = INTRINSIC (_mask_cvtneph_phf8) (res2.x, mask, src.x); - MASK_MERGE (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) - abort (); - - res3.x = INTRINSIC (_maskz_cvtneph_phf8) (mask, src.x); - MASK_ZERO (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c deleted file mode 100644 index 4827af4..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c +++ /dev/null @@ -1,76 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif - -#include "avx10-helper.h" -#include "fp8-helper.h" - -#define SIZE (AVX512F_LEN / 16) -#define SIZE_DST (AVX512F_LEN_HALF / 8) -#include "avx512f-mask-type.h" - -void -CALC (unsigned char *r, _Float16 *s) -{ - int i, hf8_bf8, saturate; - - hf8_bf8 = 0; - saturate = 1; - - for (i = 0; i < SIZE_DST; i++) - { - r[i] = 0; - if (i < SIZE) - { - Float16Union usrc = {.f16 = s[i]}; - r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); - } - } -} - -void -TEST (void) -{ - int i,sign; - UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; - UNION_TYPE (AVX512F_LEN, h) src; - MASK_TYPE mask = MASK_VALUE; - unsigned char res_ref[SIZE]; - - sign = 1; - for (i = 0; i < SIZE; i++) - { - src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); - sign = -sign; - } - -#if AVX512F_LEN > 128 - for (i = 0; i < SIZE_DST; i++) - res2.a[i] = DEFAULT_VALUE; -#else - for (i = 0; i < SIZE; i++) - res2.a[i] = DEFAULT_VALUE; -#endif - - CALC(res_ref, src.a); - - res1.x = INTRINSIC (_cvtnesph_phf8) (src.x); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) - abort (); - - res2.x = INTRINSIC (_mask_cvtnesph_phf8) (res2.x, mask, src.x); - MASK_MERGE (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) - abort (); - - res3.x = INTRINSIC (_maskz_cvtnesph_phf8) (mask, src.x); - MASK_ZERO (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2bf8-2.c new file mode 100644 index 0000000..189c2d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2bf8-2.c @@ -0,0 +1,76 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" +#include "fp8-helper.h" + +#define SIZE (AVX512F_LEN / 16) +#define SIZE_DST (AVX512F_LEN_HALF / 8) +#include "avx512f-mask-type.h" + +void +CALC (unsigned char *r, _Float16 *s) +{ + int i, hf8_bf8, saturate; + + hf8_bf8 = 1; + saturate = 0; + + for (i = 0; i < SIZE_DST; i++) + { + r[i] = 0; + if (i < SIZE) + { + Float16Union usrc = {.f16 = s[i]}; + r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); + } + } +} + +void +TEST (void) +{ + int i,sign; + UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, h) src; + MASK_TYPE mask = MASK_VALUE; + unsigned char res_ref[SIZE_DST]; + + sign = 1; + for (i = 0; i < SIZE; i++) + { + src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); + sign = -sign; + } + +#if AVX512F_LEN > 128 + for (i = 0; i < SIZE_DST; i++) + res2.a[i] = DEFAULT_VALUE; +#else + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; +#endif + + CALC(res_ref, src.a); + + res1.x = INTRINSIC (_cvtph_bf8) (src.x); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) + abort (); + + res2.x = INTRINSIC (_mask_cvtph_bf8) (res2.x, mask, src.x); + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) + abort (); + + res3.x = INTRINSIC (_maskz_cvtph_bf8) (mask, src.x); + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2bf8s-2.c new file mode 100644 index 0000000..090c4c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2bf8s-2.c @@ -0,0 +1,76 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" +#include "fp8-helper.h" + +#define SIZE (AVX512F_LEN / 16) +#define SIZE_DST (AVX512F_LEN_HALF / 8) +#include "avx512f-mask-type.h" + +void +CALC (unsigned char *r, _Float16 *s) +{ + int i, hf8_bf8, saturate; + + hf8_bf8 = 1; + saturate = 1; + + for (i = 0; i < SIZE_DST; i++) + { + r[i] = 0; + if (i < SIZE) + { + Float16Union usrc = {.f16 = s[i]}; + r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); + } + } +} + +void +TEST (void) +{ + int i,sign; + UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, h) src; + MASK_TYPE mask = MASK_VALUE; + unsigned char res_ref[SIZE_DST]; + + sign = 1; + for (i = 0; i < SIZE; i++) + { + src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); + sign = -sign; + } + +#if AVX512F_LEN > 128 + for (i = 0; i < SIZE_DST; i++) + res2.a[i] = DEFAULT_VALUE; +#else + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; +#endif + + CALC(res_ref, src.a); + + res1.x = INTRINSIC (_cvtsph_bf8) (src.x); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) + abort (); + + res2.x = INTRINSIC (_mask_cvtsph_bf8) (res2.x, mask, src.x); + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) + abort (); + + res3.x = INTRINSIC (_maskz_cvtsph_bf8) (mask, src.x); + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2hf8-2.c new file mode 100644 index 0000000..8cdb513 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2hf8-2.c @@ -0,0 +1,76 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" +#include "fp8-helper.h" + +#define SIZE (AVX512F_LEN / 16) +#define SIZE_DST (AVX512F_LEN_HALF / 8) +#include "avx512f-mask-type.h" + +void +CALC (unsigned char *r, _Float16 *s) +{ + int i, hf8_bf8, saturate; + + hf8_bf8 = 0; + saturate = 0; + + for (i = 0; i < SIZE_DST; i++) + { + r[i] = 0; + if (i < SIZE) + { + Float16Union usrc = {.f16 = s[i]}; + r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); + } + } +} + +void +TEST (void) +{ + int i,sign; + UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, h) src; + MASK_TYPE mask = MASK_VALUE; + unsigned char res_ref[SIZE]; + + sign = 1; + for (i = 0; i < SIZE; i++) + { + src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); + sign = -sign; + } + +#if AVX512F_LEN > 128 + for (i = 0; i < SIZE_DST; i++) + res2.a[i] = DEFAULT_VALUE; +#else + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; +#endif + + CALC(res_ref, src.a); + + res1.x = INTRINSIC (_cvtph_hf8) (src.x); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) + abort (); + + res2.x = INTRINSIC (_mask_cvtph_hf8) (res2.x, mask, src.x); + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) + abort (); + + res3.x = INTRINSIC (_maskz_cvtph_hf8) (mask, src.x); + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2hf8s-2.c new file mode 100644 index 0000000..ded773e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtph2hf8s-2.c @@ -0,0 +1,76 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" +#include "fp8-helper.h" + +#define SIZE (AVX512F_LEN / 16) +#define SIZE_DST (AVX512F_LEN_HALF / 8) +#include "avx512f-mask-type.h" + +void +CALC (unsigned char *r, _Float16 *s) +{ + int i, hf8_bf8, saturate; + + hf8_bf8 = 0; + saturate = 1; + + for (i = 0; i < SIZE_DST; i++) + { + r[i] = 0; + if (i < SIZE) + { + Float16Union usrc = {.f16 = s[i]}; + r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); + } + } +} + +void +TEST (void) +{ + int i,sign; + UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, h) src; + MASK_TYPE mask = MASK_VALUE; + unsigned char res_ref[SIZE]; + + sign = 1; + for (i = 0; i < SIZE; i++) + { + src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); + sign = -sign; + } + +#if AVX512F_LEN > 128 + for (i = 0; i < SIZE_DST; i++) + res2.a[i] = DEFAULT_VALUE; +#else + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; +#endif + + CALC(res_ref, src.a); + + res1.x = INTRINSIC (_cvtsph_hf8) (src.x); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) + abort (); + + res2.x = INTRINSIC (_mask_cvtsph_hf8) (res2.x, mask, src.x); + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) + abort (); + + res3.x = INTRINSIC (_maskz_cvtsph_hf8) (mask, src.x); + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c index 49b3438..6bc6b18 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c @@ -63,30 +63,30 @@ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtneph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2bf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8x\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8y\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8sx\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvtph2hf8sy\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %ymm\[0-9]\+, %ymm\[0-9]\+(?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vpsllw\[ \t]\+\\\$8, %ymm\[0-9]\+, %ymm\[0-9]\+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -234,51 +234,51 @@ avx10_2_vcvthf82ph_test (void) } void extern -avx10_2_vcvtneph2bf8_test (void) +avx10_2_vcvtph2bf8_test (void) { - x128i = _mm_cvtneph_pbf8 (x128h); - x128i = _mm_mask_cvtneph_pbf8 (x128i, m8, x128h); - x128i = _mm_maskz_cvtneph_pbf8 (m8, x128h); + x128i = _mm_cvtph_bf8 (x128h); + x128i = _mm_mask_cvtph_bf8 (x128i, m8, x128h); + x128i = _mm_maskz_cvtph_bf8 (m8, x128h); - x128i = _mm256_cvtneph_pbf8 (x256h); - x128i = _mm256_mask_cvtneph_pbf8 (x128i, m16, x256h); - x128i = _mm256_maskz_cvtneph_pbf8 (m16, x256h); + x128i = _mm256_cvtph_bf8 (x256h); + x128i = _mm256_mask_cvtph_bf8 (x128i, m16, x256h); + x128i = _mm256_maskz_cvtph_bf8 (m16, x256h); } void extern -avx10_2_vcvtneph2bf8s_test (void) +avx10_2_vcvtph2bf8s_test (void) { - x128i = _mm_cvtnesph_pbf8 (x128h); - x128i = _mm_mask_cvtnesph_pbf8 (x128i, m8, x128h); - x128i = _mm_maskz_cvtnesph_pbf8 (m8, x128h); + x128i = _mm_cvtsph_bf8 (x128h); + x128i = _mm_mask_cvtsph_bf8 (x128i, m8, x128h); + x128i = _mm_maskz_cvtsph_bf8 (m8, x128h); - x128i = _mm256_cvtnesph_pbf8 (x256h); - x128i = _mm256_mask_cvtnesph_pbf8 (x128i, m16, x256h); - x128i = _mm256_maskz_cvtnesph_pbf8 (m16, x256h); + x128i = _mm256_cvtsph_bf8 (x256h); + x128i = _mm256_mask_cvtsph_bf8 (x128i, m16, x256h); + x128i = _mm256_maskz_cvtsph_bf8 (m16, x256h); } void extern -avx10_2_vcvtneph2hf8_test (void) +avx10_2_vcvtph2hf8_test (void) { - x128i = _mm_cvtneph_phf8 (x128h); - x128i = _mm_mask_cvtneph_phf8 (x128i, m8, x128h); - x128i = _mm_maskz_cvtneph_phf8 (m8, x128h); + x128i = _mm_cvtph_hf8 (x128h); + x128i = _mm_mask_cvtph_hf8 (x128i, m8, x128h); + x128i = _mm_maskz_cvtph_hf8 (m8, x128h); - x128i = _mm256_cvtneph_phf8 (x256h); - x128i = _mm256_mask_cvtneph_phf8 (x128i, m16, x256h); - x128i = _mm256_maskz_cvtneph_phf8 (m16, x256h); + x128i = _mm256_cvtph_hf8 (x256h); + x128i = _mm256_mask_cvtph_hf8 (x128i, m16, x256h); + x128i = _mm256_maskz_cvtph_hf8 (m16, x256h); } void extern -avx10_2_vcvtneph2hf8s_test (void) +avx10_2_vcvtph2hf8s_test (void) { - x128i = _mm_cvtnesph_phf8 (x128h); - x128i = _mm_mask_cvtnesph_phf8 (x128i, m8, x128h); - x128i = _mm_maskz_cvtnesph_phf8 (m8, x128h); + x128i = _mm_cvtsph_hf8 (x128h); + x128i = _mm_mask_cvtsph_hf8 (x128i, m8, x128h); + x128i = _mm_maskz_cvtsph_hf8 (m8, x128h); - x128i = _mm256_cvtnesph_phf8 (x256h); - x128i = _mm256_mask_cvtnesph_phf8 (x128i, m16, x256h); - x128i = _mm256_maskz_cvtnesph_phf8 (m16, x256h); + x128i = _mm256_cvtsph_hf8 (x256h); + x128i = _mm256_mask_cvtsph_hf8 (x128i, m16, x256h); + x128i = _mm256_maskz_cvtsph_hf8 (m16, x256h); } void extern diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8-2.c deleted file mode 100644 index b8b817a..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtneph2bf8-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtneph2bf8-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c deleted file mode 100644 index 7ce76cc..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2bf8s-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtneph2bf8s-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtneph2bf8s-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8-2.c deleted file mode 100644 index 3d834eb..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtneph2hf8-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtneph2hf8-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c deleted file mode 100644 index 3e6ee83..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtneph2hf8s-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtneph2hf8s-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtneph2hf8s-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2bf8-2.c new file mode 100644 index 0000000..826b5ff --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2bf8-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2bf8-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2bf8-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2bf8s-2.c new file mode 100644 index 0000000..c5b9576 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2bf8s-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2bf8s-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2bf8s-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2hf8-2.c new file mode 100644 index 0000000..00f2928 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2hf8-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2hf8-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2hf8-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2hf8s-2.c new file mode 100644 index 0000000..a2fa0c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtph2hf8s-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2hf8s-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvtph2hf8s-2.c" -- cgit v1.1