From cfef82be8973c9dc481c96306ba3e2c342398e48 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Thu, 23 Jan 2025 09:52:16 +0800 Subject: i386: Change mnemonics from VCVTNE2PH2[B,H]F8 to VCVT2PH2[B,H]F8 gcc/ChangeLog: PR target/118270 * config/i386/avx10_2-512convertintrin.h: Change intrin and builtin name according to new mnemonics. * config/i386/avx10_2convertintrin.h: Ditto. * config/i386/i386-builtin.def (BDESC): Ditto. * config/i386/sse.md (UNSPEC_VCVT2PH2BF8): Rename from UNSPEC_VCVTNE2PH2BF8. (UNSPEC_VCVT2PH2BF8S): Rename from UNSPEC_VCVTNE2PH2BF8S. (UNSPEC_VCVT2PH2HF8): Rename from UNSPEC_VCVTNE2PH2HF8. (UNSPEC_VCVT2PH2HF8S): Rename from UNSPEC_VCVTNE2PH2HF8S. (UNSPEC_CONVERTFP8_PACK): Rename from UNSPEC_NECONVERTFP8_PACK. Adjust UNSPEC name. (convertfp8_pack): Rename from neconvertfp8_pack. Adjust iterator map. (vcvt): Rename to... (vcvt): ...this. gcc/testsuite/ChangeLog: PR target/118270 * gcc.target/i386/avx10_2-512-convert-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c: Move to... * gcc.target/i386/avx10_2-512-vcvt2ph2bf8-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c: Move to... * gcc.target/i386/avx10_2-512-vcvt2ph2bf8s-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c: Move to... * gcc.target/i386/avx10_2-512-vcvt2ph2hf8-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c: Move to... * gcc.target/i386/avx10_2-512-vcvt2ph2hf8s-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-convert-1.c: Adjust output and intrin call. * gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c: Move to... * gcc.target/i386/avx10_2-vcvt2ph2bf8-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c: Move to... * gcc.target/i386/avx10_2-vcvt2ph2bf8s-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c: Move to... * gcc.target/i386/avx10_2-vcvt2ph2hf8-2.c: ...here. Adjust intrin call. * gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c: Move to... * gcc.target/i386/avx10_2-vcvt2ph2hf8s-2.c: ...here. Adjust intrin call. --- gcc/config/i386/avx10_2-512convertintrin.h | 142 +++++----- gcc/config/i386/avx10_2convertintrin.h | 286 ++++++++++----------- gcc/config/i386/i386-builtin.def | 24 +- gcc/config/i386/sse.md | 30 +-- .../gcc.target/i386/avx10_2-512-convert-1.c | 56 ++-- .../gcc.target/i386/avx10_2-512-vcvt2ph2bf8-2.c | 80 ++++++ .../gcc.target/i386/avx10_2-512-vcvt2ph2bf8s-2.c | 80 ++++++ .../gcc.target/i386/avx10_2-512-vcvt2ph2hf8-2.c | 80 ++++++ .../gcc.target/i386/avx10_2-512-vcvt2ph2hf8s-2.c | 80 ++++++ .../gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c | 80 ------ .../gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c | 80 ------ .../gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c | 80 ------ .../gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c | 80 ------ gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c | 104 ++++---- .../gcc.target/i386/avx10_2-vcvt2ph2bf8-2.c | 16 ++ .../gcc.target/i386/avx10_2-vcvt2ph2bf8s-2.c | 16 ++ .../gcc.target/i386/avx10_2-vcvt2ph2hf8-2.c | 16 ++ .../gcc.target/i386/avx10_2-vcvt2ph2hf8s-2.c | 16 ++ .../gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c | 16 -- .../gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c | 16 -- .../gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c | 16 -- .../gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c | 16 -- 22 files changed, 705 insertions(+), 705 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2bf8-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2bf8s-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2hf8-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2hf8s-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2bf8-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2bf8s-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2hf8-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2hf8s-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c (limited to 'gcc') diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h index 23b2636..c753dd7 100644 --- a/gcc/config/i386/avx10_2-512convertintrin.h +++ b/gcc/config/i386/avx10_2-512convertintrin.h @@ -265,134 +265,134 @@ _mm512_maskz_cvtbiassph_phf8 (__mmask32 __U, __m512i __A, __m512h __B) extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtne2ph_pbf8 (__m512h __A, __m512h __B) +_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtne2ph_pbf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) +_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtne2ph_pbf8 (__mmask64 __U, __m512h __A, __m512h __B) +_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtnes2ph_pbf8 (__m512h __A, __m512h __B) +_mm512_cvts2ph_bf8 (__m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtnes2ph_pbf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) +_mm512_mask_cvts2ph_bf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtnes2ph_pbf8 (__mmask64 __U, __m512h __A, __m512h __B) +_mm512_maskz_cvts2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtne2ph_phf8 (__m512h __A, __m512h __B) +_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtne2ph_phf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) +_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtne2ph_phf8 (__mmask64 __U, __m512h __A, __m512h __B) +_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtnes2ph_phf8 (__m512h __A, __m512h __B) +_mm512_cvts2ph_hf8 (__m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtnes2ph_phf8 (__m512i __W, __mmask64 __U, +_mm512_mask_cvts2ph_hf8 (__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); } extern __inline__ __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtnes2ph_phf8 (__mmask64 __U, __m512h __A, __m512h __B) +_mm512_maskz_cvts2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) { - return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); } extern __inline__ __m512h diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h index 4b8a9e1..45d2bff 100644 --- a/gcc/config/i386/avx10_2convertintrin.h +++ b/gcc/config/i386/avx10_2convertintrin.h @@ -418,266 +418,266 @@ _mm256_maskz_cvtbiassph_phf8 (__mmask16 __U, __m256i __A, __m256h __B) extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtne2ph_pbf8 (__m128h __A, __m128h __B) +_mm_cvt2ph_bf8 (__m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) -1); + return (__m128i) __builtin_ia32_vcvt2ph2bf8128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtne2ph_pbf8 (__m128i __W, __mmask16 __U, - __m128h __A, __m128h __B) +_mm_mask_cvt2ph_bf8 (__m128i __W, __mmask16 __U, + __m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvt2ph2bf8128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) __W, + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtne2ph_pbf8 (__mmask16 __U, __m128h __A, __m128h __B) +_mm_maskz_cvt2ph_bf8 (__mmask16 __U, __m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvt2ph2bf8128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtne2ph_pbf8 (__m256h __A, __m256h __B) +_mm256_cvt2ph_bf8 (__m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) -1); + return (__m256i) __builtin_ia32_vcvt2ph2bf8256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) -1); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtne2ph_pbf8 (__m256i __W, __mmask32 __U, - __m256h __A, __m256h __B) +_mm256_mask_cvt2ph_bf8 (__m256i __W, __mmask32 __U, + __m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvt2ph2bf8256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) __W, + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtne2ph_pbf8 (__mmask32 __U, __m256h __A, __m256h __B) +_mm256_maskz_cvt2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvt2ph2bf8256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtnes2ph_pbf8 (__m128h __A, __m128h __B) +_mm_cvts2ph_bf8 (__m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) -1); + return (__m128i) __builtin_ia32_vcvt2ph2bf8s128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtnes2ph_pbf8 (__m128i __W, __mmask16 __U, - __m128h __A, __m128h __B) +_mm_mask_cvts2ph_bf8 (__m128i __W, __mmask16 __U, + __m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvt2ph2bf8s128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) __W, + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtnes2ph_pbf8 (__mmask16 __U, __m128h __A, __m128h __B) +_mm_maskz_cvts2ph_bf8 (__mmask16 __U, __m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvt2ph2bf8s128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtnes2ph_pbf8 (__m256h __A, __m256h __B) +_mm256_cvts2ph_bf8 (__m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) -1); + return (__m256i) __builtin_ia32_vcvt2ph2bf8s256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) -1); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtnes2ph_pbf8 (__m256i __W, __mmask32 __U, +_mm256_mask_cvts2ph_bf8 (__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvt2ph2bf8s256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) __W, + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtnes2ph_pbf8 (__mmask32 __U, __m256h __A, __m256h __B) +_mm256_maskz_cvts2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvt2ph2bf8s256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtne2ph_phf8 (__m128h __A, __m128h __B) +_mm_cvt2ph_hf8 (__m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) -1); + return (__m128i) __builtin_ia32_vcvt2ph2hf8128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtne2ph_phf8 (__m128i __W, __mmask16 __U, - __m128h __A, __m128h __B) +_mm_mask_cvt2ph_hf8 (__m128i __W, __mmask16 __U, + __m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvt2ph2hf8128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) __W, + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtne2ph_phf8 (__mmask16 __U, __m128h __A, __m128h __B) +_mm_maskz_cvt2ph_hf8 (__mmask16 __U, __m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvt2ph2hf8128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtne2ph_phf8 (__m256h __A, __m256h __B) +_mm256_cvt2ph_hf8 (__m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) -1); + return (__m256i) __builtin_ia32_vcvt2ph2hf8256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) -1); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtne2ph_phf8 (__m256i __W, __mmask32 __U, - __m256h __A, __m256h __B) +_mm256_mask_cvt2ph_hf8 (__m256i __W, __mmask32 __U, + __m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvt2ph2hf8256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) __W, + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtne2ph_phf8 (__mmask32 __U, __m256h __A, __m256h __B) +_mm256_maskz_cvt2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvt2ph2hf8256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtnes2ph_phf8 (__m128h __A, __m128h __B) +_mm_cvts2ph_hf8 (__m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) -1); + return (__m128i) __builtin_ia32_vcvt2ph2hf8s128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) -1); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cvtnes2ph_phf8 (__m128i __W, __mmask16 __U, - __m128h __A, __m128h __B) +_mm_mask_cvts2ph_hf8 (__m128i __W, __mmask16 __U, + __m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) __W, - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvt2ph2hf8s128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) __W, + (__mmask16) __U); } extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_cvtnes2ph_phf8 (__mmask16 __U, __m128h __A, __m128h __B) +_mm_maskz_cvts2ph_hf8 (__mmask16 __U, __m128h __A, __m128h __B) { - return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A, - (__v8hf) __B, - (__v16qi) - _mm_setzero_si128 (), - (__mmask16) __U); + return (__m128i) __builtin_ia32_vcvt2ph2hf8s128_mask ((__v8hf) __A, + (__v8hf) __B, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_cvtnes2ph_phf8 (__m256h __A, __m256h __B) +_mm256_cvts2ph_hf8 (__m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) -1); + return (__m256i) __builtin_ia32_vcvt2ph2hf8s256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) -1); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_cvtnes2ph_phf8 (__m256i __W, __mmask32 __U, - __m256h __A, __m256h __B) +_mm256_mask_cvts2ph_hf8 (__m256i __W, __mmask32 __U, + __m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) __W, - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvt2ph2hf8s256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) __W, + (__mmask32) __U); } extern __inline__ __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_cvtnes2ph_phf8 (__mmask32 __U, __m256h __A, __m256h __B) +_mm256_maskz_cvts2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B) { - return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A, - (__v16hf) __B, - (__v32qi) - _mm256_setzero_si256 (), - (__mmask32) __U); + return (__m256i) __builtin_ia32_vcvt2ph2hf8s256_mask ((__v16hf) __A, + (__v16hf) __B, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); } extern __inline__ __m128h diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 7e1dad2..edd5c2b 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3152,18 +3152,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv8hf, "__builtin BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv8hf_mask, "__builtin_ia32_vcvtbiasph2hf8s128_mask", IX86_BUILTIN_VCVTBIASPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv16hf_mask, "__builtin_ia32_vcvtbiasph2hf8s256_mask", IX86_BUILTIN_VCVTBIASPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2hf8sv32hf_mask, "__builtin_ia32_vcvtbiasph2hf8s512_mask", IX86_BUILTIN_VCVTBIASPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8v8hf_mask, "__builtin_ia32_vcvtne2ph2bf8128_mask", IX86_BUILTIN_VCVTNE2PH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8v16hf_mask, "__builtin_ia32_vcvtne2ph2bf8256_mask", IX86_BUILTIN_VCVTNE2PH2BF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2bf8v32hf_mask, "__builtin_ia32_vcvtne2ph2bf8512_mask", IX86_BUILTIN_VCVTNE2PH2BF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8sv8hf_mask, "__builtin_ia32_vcvtne2ph2bf8s128_mask", IX86_BUILTIN_VCVTNE2PH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8sv16hf_mask, "__builtin_ia32_vcvtne2ph2bf8s256_mask", IX86_BUILTIN_VCVTNE2PH2BF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2bf8sv32hf_mask, "__builtin_ia32_vcvtne2ph2bf8s512_mask", IX86_BUILTIN_VCVTNE2PH2BF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8v8hf_mask, "__builtin_ia32_vcvtne2ph2hf8128_mask", IX86_BUILTIN_VCVTNE2PH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8v16hf_mask, "__builtin_ia32_vcvtne2ph2hf8256_mask", IX86_BUILTIN_VCVTNE2PH2HF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2hf8v32hf_mask, "__builtin_ia32_vcvtne2ph2hf8512_mask", IX86_BUILTIN_VCVTNE2PH2HF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8sv8hf_mask, "__builtin_ia32_vcvtne2ph2hf8s128_mask", IX86_BUILTIN_VCVTNE2PH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8sv16hf_mask, "__builtin_ia32_vcvtne2ph2hf8s256_mask", IX86_BUILTIN_VCVTNE2PH2HF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2hf8sv32hf_mask, "__builtin_ia32_vcvtne2ph2hf8s512_mask", IX86_BUILTIN_VCVTNE2PH2HF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2bf8v8hf_mask, "__builtin_ia32_vcvt2ph2bf8128_mask", IX86_BUILTIN_VCVT2PH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2bf8v16hf_mask, "__builtin_ia32_vcvt2ph2bf8256_mask", IX86_BUILTIN_VCVT2PH2BF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvt2ph2bf8v32hf_mask, "__builtin_ia32_vcvt2ph2bf8512_mask", IX86_BUILTIN_VCVT2PH2BF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2bf8sv8hf_mask, "__builtin_ia32_vcvt2ph2bf8s128_mask", IX86_BUILTIN_VCVT2PH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2bf8sv16hf_mask, "__builtin_ia32_vcvt2ph2bf8s256_mask", IX86_BUILTIN_VCVT2PH2BF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvt2ph2bf8sv32hf_mask, "__builtin_ia32_vcvt2ph2bf8s512_mask", IX86_BUILTIN_VCVT2PH2BF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2hf8v8hf_mask, "__builtin_ia32_vcvt2ph2hf8128_mask", IX86_BUILTIN_VCVT2PH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2hf8v16hf_mask, "__builtin_ia32_vcvt2ph2hf8256_mask", IX86_BUILTIN_VCVT2PH2HF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvt2ph2hf8v32hf_mask, "__builtin_ia32_vcvt2ph2hf8512_mask", IX86_BUILTIN_VCVT2PH2HF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2hf8sv8hf_mask, "__builtin_ia32_vcvt2ph2hf8s128_mask", IX86_BUILTIN_VCVT2PH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvt2ph2hf8sv16hf_mask, "__builtin_ia32_vcvt2ph2hf8s256_mask", IX86_BUILTIN_VCVT2PH2HF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvt2ph2hf8sv32hf_mask, "__builtin_ia32_vcvt2ph2hf8s512_mask", IX86_BUILTIN_VCVT2PH2HF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8v8hf_mask, "__builtin_ia32_vcvtneph2bf8128_mask", IX86_BUILTIN_VCVTNEPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8v16hf_mask, "__builtin_ia32_vcvtneph2bf8256_mask", IX86_BUILTIN_VCVTNEPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2bf8v32hf_mask, "__builtin_ia32_vcvtneph2bf8512_mask", IX86_BUILTIN_VCVTNEPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c239f41..582942c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -220,10 +220,10 @@ UNSPEC_VCVTBIASPH2BF8S UNSPEC_VCVTBIASPH2HF8 UNSPEC_VCVTBIASPH2HF8S - UNSPEC_VCVTNE2PH2BF8 - UNSPEC_VCVTNE2PH2BF8S - UNSPEC_VCVTNE2PH2HF8 - UNSPEC_VCVTNE2PH2HF8S + UNSPEC_VCVT2PH2BF8 + UNSPEC_VCVT2PH2BF8S + UNSPEC_VCVT2PH2HF8 + UNSPEC_VCVT2PH2HF8S UNSPEC_VCVTNEPH2BF8 UNSPEC_VCVTNEPH2BF8S UNSPEC_VCVTNEPH2HF8 @@ -31722,24 +31722,24 @@ (define_mode_attr ssebvecmode [(V8HF "V16QI") (V16HF "V32QI") (V32HF "V64QI")]) -(define_int_iterator UNSPEC_NECONVERTFP8_PACK - [UNSPEC_VCVTNE2PH2BF8 UNSPEC_VCVTNE2PH2BF8S - UNSPEC_VCVTNE2PH2HF8 UNSPEC_VCVTNE2PH2HF8S]) +(define_int_iterator UNSPEC_CONVERTFP8_PACK + [UNSPEC_VCVT2PH2BF8 UNSPEC_VCVT2PH2BF8S + UNSPEC_VCVT2PH2HF8 UNSPEC_VCVT2PH2HF8S]) -(define_int_attr neconvertfp8_pack - [(UNSPEC_VCVTNE2PH2BF8 "ne2ph2bf8") - (UNSPEC_VCVTNE2PH2BF8S "ne2ph2bf8s") - (UNSPEC_VCVTNE2PH2HF8 "ne2ph2hf8") - (UNSPEC_VCVTNE2PH2HF8S "ne2ph2hf8s")]) +(define_int_attr convertfp8_pack + [(UNSPEC_VCVT2PH2BF8 "2ph2bf8") + (UNSPEC_VCVT2PH2BF8S "2ph2bf8s") + (UNSPEC_VCVT2PH2HF8 "2ph2hf8") + (UNSPEC_VCVT2PH2HF8S "2ph2hf8s")]) -(define_insn "vcvt" +(define_insn "vcvt" [(set (match_operand: 0 "register_operand" "=v") (unspec: [(match_operand:VHF_AVX10_2 1 "register_operand" "v") (match_operand:VHF_AVX10_2 2 "nonimmediate_operand" "vm")] - UNSPEC_NECONVERTFP8_PACK))] + UNSPEC_CONVERTFP8_PACK))] "TARGET_AVX10_2_256" - "vcvt\t{%2, %1, %0|%0, %1, %2}" + "vcvt\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex")]) (define_mode_attr ssebvecmode_2 diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c index 9a25e22..955c862 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c @@ -18,18 +18,18 @@ /* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%ymm\[0-9\]+,\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -108,35 +108,35 @@ avx10_2_512_vcvtbiasph2hf8s_test (void) } void extern -avx10_2_512_vcvtne2ph2bf8_test (void) +avx10_2_512_vcvt2ph2bf8_test (void) { - x512i = _mm512_cvtne2ph_pbf8 (x512h, x512h); - x512i = _mm512_mask_cvtne2ph_pbf8 (x512i, m64, x512h, x512h); - x512i = _mm512_maskz_cvtne2ph_pbf8 (m64, x512h, x512h); + x512i = _mm512_cvt2ph_bf8 (x512h, x512h); + x512i = _mm512_mask_cvt2ph_bf8 (x512i, m64, x512h, x512h); + x512i = _mm512_maskz_cvt2ph_bf8 (m64, x512h, x512h); } void extern -avx10_2_512_vcvtne2ph2bf8s_test (void) +avx10_2_512_vcvt2ph2bf8s_test (void) { - x512i = _mm512_cvtnes2ph_pbf8 (x512h, x512h); - x512i = _mm512_mask_cvtnes2ph_pbf8 (x512i, m64, x512h, x512h); - x512i = _mm512_maskz_cvtnes2ph_pbf8 (m64, x512h, x512h); + x512i = _mm512_cvts2ph_bf8 (x512h, x512h); + x512i = _mm512_mask_cvts2ph_bf8 (x512i, m64, x512h, x512h); + x512i = _mm512_maskz_cvts2ph_bf8 (m64, x512h, x512h); } void extern -avx10_2_512_vcvtne2ph2hf8_test (void) +avx10_2_512_vcvt2ph2hf8_test (void) { - x512i = _mm512_cvtne2ph_phf8 (x512h, x512h); - x512i = _mm512_mask_cvtne2ph_phf8 (x512i, m64, x512h, x512h); - x512i = _mm512_maskz_cvtne2ph_phf8 (m64, x512h, x512h); + x512i = _mm512_cvt2ph_hf8 (x512h, x512h); + x512i = _mm512_mask_cvt2ph_hf8 (x512i, m64, x512h, x512h); + x512i = _mm512_maskz_cvt2ph_hf8 (m64, x512h, x512h); } void extern -avx10_2_512_vcvtne2ph2hf8s_test (void) +avx10_2_512_vcvt2ph2hf8s_test (void) { - x512i = _mm512_cvtnes2ph_phf8 (x512h, x512h); - x512i = _mm512_mask_cvtnes2ph_phf8 (x512i, m64, x512h, x512h); - x512i = _mm512_maskz_cvtnes2ph_phf8 (m64, x512h, x512h); + x512i = _mm512_cvts2ph_hf8 (x512h, x512h); + x512i = _mm512_mask_cvts2ph_hf8 (x512i, m64, x512h, x512h); + x512i = _mm512_maskz_cvts2ph_hf8 (m64, x512h, x512h); } void extern diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2bf8-2.c new file mode 100644 index 0000000..8662d26 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2bf8-2.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" +#include "fp8-helper.h" + +#define SIZE_SRC (AVX512F_LEN / 16) +#define SIZE (AVX512F_LEN / 8) +#include "avx512f-mask-type.h" + +void +CALC (unsigned char *r, _Float16 *s1, _Float16 *s2) +{ + _Float16 temp; + Float16Union ut = {.f16 = temp}; + int i, hf8_bf8, saturate; + + hf8_bf8 = 1; + saturate = 0; + + for (i = 0; i < SIZE; i++) + { + r[i] = 0; + if (i < SIZE_SRC) + { + Float16Union usrc2 = {.f16 = s2[i]}; + ut.u16 = usrc2.u16; + } + else + { + Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]}; + ut.u16 = usrc1.u16; + } + r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate); + } +} + +void +TEST (void) +{ + int i,sign; + UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, h) src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned char res_ref[SIZE]; + + sign = 1; + for (i = 0; i < SIZE_SRC; i++) + { + src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3)))); + src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3)))); + sign = -sign; + } + + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + CALC(res_ref, src1.a, src2.a); + + res1.x = INTRINSIC (_cvt2ph_bf8) (src1.x, src2.x); + if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) + abort (); + + res2.x = INTRINSIC (_mask_cvt2ph_bf8) (res2.x, mask, src1.x, src2.x); + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) + abort (); + + res3.x = INTRINSIC (_maskz_cvt2ph_bf8) (mask, src1.x, src2.x); + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2bf8s-2.c new file mode 100644 index 0000000..4933a8b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2bf8s-2.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" +#include "fp8-helper.h" + +#define SIZE_SRC (AVX512F_LEN / 16) +#define SIZE (AVX512F_LEN / 8) +#include "avx512f-mask-type.h" + +void +CALC (unsigned char *r, _Float16 *s1, _Float16 *s2) +{ + _Float16 temp; + Float16Union ut = {.f16 = temp}; + int i, hf8_bf8, saturate; + + hf8_bf8 = 1; + saturate = 1; + + for (i = 0; i < SIZE; i++) + { + r[i] = 0; + if (i < SIZE_SRC) + { + Float16Union usrc2 = {.f16 = s2[i]}; + ut.u16 = usrc2.u16; + } + else + { + Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]}; + ut.u16 = usrc1.u16; + } + r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate); + } +} + +void +TEST (void) +{ + int i,sign; + UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, h) src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned char res_ref[SIZE]; + + sign = 1; + for (i = 0; i < SIZE_SRC; i++) + { + src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3)))); + src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3)))); + sign = -sign; + } + + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + CALC(res_ref, src1.a, src2.a); + + res1.x = INTRINSIC (_cvts2ph_bf8) (src1.x, src2.x); + if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) + abort (); + + res2.x = INTRINSIC (_mask_cvts2ph_bf8) (res2.x, mask, src1.x, src2.x); + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) + abort (); + + res3.x = INTRINSIC (_maskz_cvts2ph_bf8) (mask, src1.x, src2.x); + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2hf8-2.c new file mode 100644 index 0000000..633d15a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2hf8-2.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" +#include "fp8-helper.h" + +#define SIZE_SRC (AVX512F_LEN / 16) +#define SIZE (AVX512F_LEN / 8) +#include "avx512f-mask-type.h" + +void +CALC (unsigned char *r, _Float16 *s1, _Float16 *s2) +{ + _Float16 temp; + Float16Union ut = {.f16 = temp}; + int i, hf8_bf8, saturate; + + hf8_bf8 = 0; + saturate = 0; + + for (i = 0; i < SIZE; i++) + { + r[i] = 0; + if (i < SIZE_SRC) + { + Float16Union usrc2 = {.f16 = s2[i]}; + ut.u16 = usrc2.u16; + } + else + { + Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]}; + ut.u16 = usrc1.u16; + } + r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate); + } +} + +void +TEST (void) +{ + int i,sign; + UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, h) src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned char res_ref[SIZE]; + + sign = 1; + for (i = 0; i < SIZE_SRC; i++) + { + src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3)))); + src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3)))); + sign = -sign; + } + + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + CALC(res_ref, src1.a, src2.a); + + res1.x = INTRINSIC (_cvt2ph_hf8) (src1.x, src2.x); + if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) + abort (); + + res2.x = INTRINSIC (_mask_cvt2ph_hf8) (res2.x, mask, src1.x, src2.x); + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) + abort (); + + res3.x = INTRINSIC (_maskz_cvt2ph_hf8) (mask, src1.x, src2.x); + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2hf8s-2.c new file mode 100644 index 0000000..e53e924 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ph2hf8s-2.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#ifndef AVX10_2 +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#endif + +#include "avx10-helper.h" +#include "fp8-helper.h" + +#define SIZE_SRC (AVX512F_LEN / 16) +#define SIZE (AVX512F_LEN / 8) +#include "avx512f-mask-type.h" + +void +CALC (unsigned char *r, _Float16 *s1, _Float16 *s2) +{ + _Float16 temp; + Float16Union ut = {.f16 = temp}; + int i, hf8_bf8, saturate; + + hf8_bf8 = 0; + saturate = 1; + + for (i = 0; i < SIZE; i++) + { + r[i] = 0; + if (i < SIZE_SRC) + { + Float16Union usrc2 = {.f16 = s2[i]}; + ut.u16 = usrc2.u16; + } + else + { + Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]}; + ut.u16 = usrc1.u16; + } + r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate); + } +} + +void +TEST (void) +{ + int i,sign; + UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3; + UNION_TYPE (AVX512F_LEN, h) src1, src2; + MASK_TYPE mask = MASK_VALUE; + unsigned char res_ref[SIZE]; + + sign = 1; + for (i = 0; i < SIZE_SRC; i++) + { + src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3)))); + src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3)))); + sign *= -1; + } + + for (i = 0; i < SIZE; i++) + res2.a[i] = DEFAULT_VALUE; + + CALC(res_ref, src1.a, src2.a); + + res1.x = INTRINSIC (_cvts2ph_hf8) (src1.x, src2.x); + if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) + abort (); + + res2.x = INTRINSIC (_mask_cvts2ph_hf8) (res2.x, mask, src1.x, src2.x); + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) + abort (); + + res3.x = INTRINSIC (_maskz_cvts2ph_hf8) (mask, src1.x, src2.x); + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c deleted file mode 100644 index 7e7865d..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c +++ /dev/null @@ -1,80 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif - -#include "avx10-helper.h" -#include "fp8-helper.h" - -#define SIZE_SRC (AVX512F_LEN / 16) -#define SIZE (AVX512F_LEN / 8) -#include "avx512f-mask-type.h" - -void -CALC (unsigned char *r, _Float16 *s1, _Float16 *s2) -{ - _Float16 temp; - Float16Union ut = {.f16 = temp}; - int i, hf8_bf8, saturate; - - hf8_bf8 = 1; - saturate = 0; - - for (i = 0; i < SIZE; i++) - { - r[i] = 0; - if (i < SIZE_SRC) - { - Float16Union usrc2 = {.f16 = s2[i]}; - ut.u16 = usrc2.u16; - } - else - { - Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]}; - ut.u16 = usrc1.u16; - } - r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate); - } -} - -void -TEST (void) -{ - int i,sign; - UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3; - UNION_TYPE (AVX512F_LEN, h) src1, src2; - MASK_TYPE mask = MASK_VALUE; - unsigned char res_ref[SIZE]; - - sign = 1; - for (i = 0; i < SIZE_SRC; i++) - { - src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3)))); - src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3)))); - sign = -sign; - } - - for (i = 0; i < SIZE; i++) - res2.a[i] = DEFAULT_VALUE; - - CALC(res_ref, src1.a, src2.a); - - res1.x = INTRINSIC (_cvtne2ph_pbf8) (src1.x, src2.x); - if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) - abort (); - - res2.x = INTRINSIC (_mask_cvtne2ph_pbf8) (res2.x, mask, src1.x, src2.x); - MASK_MERGE (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) - abort (); - - res3.x = INTRINSIC (_maskz_cvtne2ph_pbf8) (mask, src1.x, src2.x); - MASK_ZERO (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c deleted file mode 100644 index 0ca0c42..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c +++ /dev/null @@ -1,80 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif - -#include "avx10-helper.h" -#include "fp8-helper.h" - -#define SIZE_SRC (AVX512F_LEN / 16) -#define SIZE (AVX512F_LEN / 8) -#include "avx512f-mask-type.h" - -void -CALC (unsigned char *r, _Float16 *s1, _Float16 *s2) -{ - _Float16 temp; - Float16Union ut = {.f16 = temp}; - int i, hf8_bf8, saturate; - - hf8_bf8 = 1; - saturate = 1; - - for (i = 0; i < SIZE; i++) - { - r[i] = 0; - if (i < SIZE_SRC) - { - Float16Union usrc2 = {.f16 = s2[i]}; - ut.u16 = usrc2.u16; - } - else - { - Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]}; - ut.u16 = usrc1.u16; - } - r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate); - } -} - -void -TEST (void) -{ - int i,sign; - UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3; - UNION_TYPE (AVX512F_LEN, h) src1, src2; - MASK_TYPE mask = MASK_VALUE; - unsigned char res_ref[SIZE]; - - sign = 1; - for (i = 0; i < SIZE_SRC; i++) - { - src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3)))); - src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3)))); - sign = -sign; - } - - for (i = 0; i < SIZE; i++) - res2.a[i] = DEFAULT_VALUE; - - CALC(res_ref, src1.a, src2.a); - - res1.x = INTRINSIC (_cvtnes2ph_pbf8) (src1.x, src2.x); - if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) - abort (); - - res2.x = INTRINSIC (_mask_cvtnes2ph_pbf8) (res2.x, mask, src1.x, src2.x); - MASK_MERGE (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) - abort (); - - res3.x = INTRINSIC (_maskz_cvtnes2ph_pbf8) (mask, src1.x, src2.x); - MASK_ZERO (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c deleted file mode 100644 index 97afd39..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c +++ /dev/null @@ -1,80 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif - -#include "avx10-helper.h" -#include "fp8-helper.h" - -#define SIZE_SRC (AVX512F_LEN / 16) -#define SIZE (AVX512F_LEN / 8) -#include "avx512f-mask-type.h" - -void -CALC (unsigned char *r, _Float16 *s1, _Float16 *s2) -{ - _Float16 temp; - Float16Union ut = {.f16 = temp}; - int i, hf8_bf8, saturate; - - hf8_bf8 = 0; - saturate = 0; - - for (i = 0; i < SIZE; i++) - { - r[i] = 0; - if (i < SIZE_SRC) - { - Float16Union usrc2 = {.f16 = s2[i]}; - ut.u16 = usrc2.u16; - } - else - { - Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]}; - ut.u16 = usrc1.u16; - } - r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate); - } -} - -void -TEST (void) -{ - int i,sign; - UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3; - UNION_TYPE (AVX512F_LEN, h) src1, src2; - MASK_TYPE mask = MASK_VALUE; - unsigned char res_ref[SIZE]; - - sign = 1; - for (i = 0; i < SIZE_SRC; i++) - { - src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3)))); - src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3)))); - sign = -sign; - } - - for (i = 0; i < SIZE; i++) - res2.a[i] = DEFAULT_VALUE; - - CALC(res_ref, src1.a, src2.a); - - res1.x = INTRINSIC (_cvtne2ph_phf8) (src1.x, src2.x); - if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) - abort (); - - res2.x = INTRINSIC (_mask_cvtne2ph_phf8) (res2.x, mask, src1.x, src2.x); - MASK_MERGE (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) - abort (); - - res3.x = INTRINSIC (_maskz_cvtne2ph_phf8) (mask, src1.x, src2.x); - MASK_ZERO (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c deleted file mode 100644 index 2d99d11..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c +++ /dev/null @@ -1,80 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2-512" } */ -/* { dg-require-effective-target avx10_2_512 } */ - -#ifndef AVX10_2 -#define AVX10_2 -#define AVX10_2_512 -#define AVX10_512BIT -#endif - -#include "avx10-helper.h" -#include "fp8-helper.h" - -#define SIZE_SRC (AVX512F_LEN / 16) -#define SIZE (AVX512F_LEN / 8) -#include "avx512f-mask-type.h" - -void -CALC (unsigned char *r, _Float16 *s1, _Float16 *s2) -{ - _Float16 temp; - Float16Union ut = {.f16 = temp}; - int i, hf8_bf8, saturate; - - hf8_bf8 = 0; - saturate = 1; - - for (i = 0; i < SIZE; i++) - { - r[i] = 0; - if (i < SIZE_SRC) - { - Float16Union usrc2 = {.f16 = s2[i]}; - ut.u16 = usrc2.u16; - } - else - { - Float16Union usrc1 = {.f16 = s1[i-SIZE_SRC]}; - ut.u16 = usrc1.u16; - } - r[i] = convert_fp16_to_fp8(ut.f16, 0, hf8_bf8, saturate); - } -} - -void -TEST (void) -{ - int i,sign; - UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3; - UNION_TYPE (AVX512F_LEN, h) src1, src2; - MASK_TYPE mask = MASK_VALUE; - unsigned char res_ref[SIZE]; - - sign = 1; - for (i = 0; i < SIZE_SRC; i++) - { - src1.a[i] = (_Float16)(sign * (1.5 * (1 << (i % 3)))); - src2.a[i] = (_Float16)(-sign * (2.5 * (1 << (i % 3)))); - sign *= -1; - } - - for (i = 0; i < SIZE; i++) - res2.a[i] = DEFAULT_VALUE; - - CALC(res_ref, src1.a, src2.a); - - res1.x = INTRINSIC (_cvtnes2ph_phf8) (src1.x, src2.x); - if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) - abort (); - - res2.x = INTRINSIC (_mask_cvtnes2ph_phf8) (res2.x, mask, src1.x, src2.x); - MASK_MERGE (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) - abort (); - - res3.x = INTRINSIC (_maskz_cvtnes2ph_phf8) (mask, src1.x, src2.x); - MASK_ZERO (i_b) (res_ref, mask, SIZE); - if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) - abort (); -} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c index 190c972..49b3438 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c @@ -33,30 +33,30 @@ /* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvtbiasph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vcvtne2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2bf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vcvt2ph2hf8s\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+,\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vcvthf82ph\[ \\t\]*%xmm\[0-9\]+,\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ @@ -178,47 +178,47 @@ avx10_2_vcvtbiasph2hf8s_test (void) } void extern -avx10_2_vcvtne2ph2bf8_test (void) +avx10_2_vcvt2ph2bf8_test (void) { - x128i = _mm_cvtne2ph_pbf8 (x128h, x128h); - x128i = _mm_mask_cvtne2ph_pbf8 (x128i, m16, x128h, x128h); - x128i = _mm_maskz_cvtne2ph_pbf8 (m16, x128h, x128h); - x256i = _mm256_cvtne2ph_pbf8 (x256h, x256h); - x256i = _mm256_mask_cvtne2ph_pbf8 (x256i, m32, x256h, x256h); - x256i = _mm256_maskz_cvtne2ph_pbf8 (m32, x256h, x256h); + x128i = _mm_cvt2ph_bf8 (x128h, x128h); + x128i = _mm_mask_cvt2ph_bf8 (x128i, m16, x128h, x128h); + x128i = _mm_maskz_cvt2ph_bf8 (m16, x128h, x128h); + x256i = _mm256_cvt2ph_bf8 (x256h, x256h); + x256i = _mm256_mask_cvt2ph_bf8 (x256i, m32, x256h, x256h); + x256i = _mm256_maskz_cvt2ph_bf8 (m32, x256h, x256h); } void extern -avx10_2_vcvtne2ph2bf8s_test (void) +avx10_2_vcvt2ph2bf8s_test (void) { - x128i = _mm_cvtnes2ph_pbf8 (x128h, x128h); - x128i = _mm_mask_cvtnes2ph_pbf8 (x128i, m16, x128h, x128h); - x128i = _mm_maskz_cvtnes2ph_pbf8 (m16, x128h, x128h); - x256i = _mm256_cvtnes2ph_pbf8 (x256h, x256h); - x256i = _mm256_mask_cvtnes2ph_pbf8 (x256i, m32, x256h, x256h); - x256i = _mm256_maskz_cvtnes2ph_pbf8 (m32, x256h, x256h); + x128i = _mm_cvts2ph_bf8 (x128h, x128h); + x128i = _mm_mask_cvts2ph_bf8 (x128i, m16, x128h, x128h); + x128i = _mm_maskz_cvts2ph_bf8 (m16, x128h, x128h); + x256i = _mm256_cvts2ph_bf8 (x256h, x256h); + x256i = _mm256_mask_cvts2ph_bf8 (x256i, m32, x256h, x256h); + x256i = _mm256_maskz_cvts2ph_bf8 (m32, x256h, x256h); } void extern -avx10_2_vcvtne2ph2hf8_test (void) +avx10_2_vcvt2ph2hf8_test (void) { - x128i = _mm_cvtne2ph_phf8 (x128h, x128h); - x128i = _mm_mask_cvtne2ph_phf8 (x128i, m16, x128h, x128h); - x128i = _mm_maskz_cvtne2ph_phf8 (m16, x128h, x128h); - x256i = _mm256_cvtne2ph_phf8 (x256h, x256h); - x256i = _mm256_mask_cvtne2ph_phf8 (x256i, m32, x256h, x256h); - x256i = _mm256_maskz_cvtne2ph_phf8 (m32, x256h, x256h); + x128i = _mm_cvt2ph_hf8 (x128h, x128h); + x128i = _mm_mask_cvt2ph_hf8 (x128i, m16, x128h, x128h); + x128i = _mm_maskz_cvt2ph_hf8 (m16, x128h, x128h); + x256i = _mm256_cvt2ph_hf8 (x256h, x256h); + x256i = _mm256_mask_cvt2ph_hf8 (x256i, m32, x256h, x256h); + x256i = _mm256_maskz_cvt2ph_hf8 (m32, x256h, x256h); } void extern -avx10_2_vcvtne2ph2hf8s_test (void) +avx10_2_vcvt2ph2hf8s_test (void) { - x128i = _mm_cvtnes2ph_phf8 (x128h, x128h); - x128i = _mm_mask_cvtnes2ph_phf8 (x128i, m16, x128h, x128h); - x128i = _mm_maskz_cvtnes2ph_phf8 (m16, x128h, x128h); - x256i = _mm256_cvtnes2ph_phf8 (x256h, x256h); - x256i = _mm256_mask_cvtnes2ph_phf8 (x256i, m32, x256h, x256h); - x256i = _mm256_maskz_cvtnes2ph_phf8 (m32, x256h, x256h); + x128i = _mm_cvts2ph_hf8 (x128h, x128h); + x128i = _mm_mask_cvts2ph_hf8 (x128i, m16, x128h, x128h); + x128i = _mm_maskz_cvts2ph_hf8 (m16, x128h, x128h); + x256i = _mm256_cvts2ph_hf8 (x256h, x256h); + x256i = _mm256_mask_cvts2ph_hf8 (x256i, m32, x256h, x256h); + x256i = _mm256_maskz_cvts2ph_hf8 (m32, x256h, x256h); } void extern diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2bf8-2.c new file mode 100644 index 0000000..9dd940c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2bf8-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvt2ph2bf8-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvt2ph2bf8-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2bf8s-2.c new file mode 100644 index 0000000..2a9caca --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2bf8s-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvt2ph2bf8s-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvt2ph2bf8s-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2hf8-2.c new file mode 100644 index 0000000..80dc248 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2hf8-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvt2ph2hf8-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvt2ph2hf8-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2hf8s-2.c new file mode 100644 index 0000000..30f6a60 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvt2ph2hf8s-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ +/* { dg-require-effective-target avx10_2 } */ + +#define AVX10_2 +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvt2ph2hf8s-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx10_2-512-vcvt2ph2hf8s-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c deleted file mode 100644 index bf1a6c8..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtne2ph2bf8-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtne2ph2bf8-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c deleted file mode 100644 index e02b7ec..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2bf8s-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtne2ph2bf8s-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtne2ph2bf8s-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c deleted file mode 100644 index 6a2db56..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtne2ph2hf8-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtne2ph2hf8-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c deleted file mode 100644 index 2041527..0000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcvtne2ph2hf8s-2.c +++ /dev/null @@ -1,16 +0,0 @@ -/* { dg-do run } */ -/* { dg-options "-O2 -march=x86-64-v3 -mavx10.2" } */ -/* { dg-require-effective-target avx10_2 } */ - -#define AVX10_2 -#define AVX512VL -#define AVX512F_LEN 256 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtne2ph2hf8s-2.c" - -#undef AVX512F_LEN -#undef AVX512F_LEN_HALF - -#define AVX512F_LEN 128 -#define AVX512F_LEN_HALF 128 -#include "avx10_2-512-vcvtne2ph2hf8s-2.c" -- cgit v1.1