aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorHu, Lin1 <lin1.hu@intel.com>2024-08-26 10:53:49 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2024-08-26 11:48:36 +0800
commit3a97ce179f75ec32b7f591422ba254c814567e4d (patch)
tree54e64ca468401bc0d306ba66f7609c72aedf1c7a /gcc
parente2c80d237223f8524c2bd930b681aa891a13db99 (diff)
downloadgcc-3a97ce179f75ec32b7f591422ba254c814567e4d.zip
gcc-3a97ce179f75ec32b7f591422ba254c814567e4d.tar.gz
gcc-3a97ce179f75ec32b7f591422ba254c814567e4d.tar.bz2
[PATCH 2/2] AVX10.2: Support saturating convert instructions
gcc/ChangeLog: * config/i386/avx10_2-512satcvtintrin.h: Add new intrin. * config/i386/avx10_2satcvtintrin.h: Ditto. * config/i386/i386-builtin.def (BDESC): Add new builtins. * config/i386/sse.md (VF1_VF2_AVX10_2): New iterator. (VF2_AVX10_2): Ditto. (VI8_AVX10_2): Ditto. (sat_cvt_sign_prefix): Add new UNSPEC. (UNSPEC_SAT_CVT_DS_SIGN_ITER): New iterator. (pd2dqssuff): Ditto. (avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>): New. (avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>): Ditto. (avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>): Ditto. (avx10_2_vcvttsd2<sat_cvt_sign_prefix>sis<mode><round_saeonly_name>): Ditto. (avx10_2_vcvttss2<sat_cvt_sign_prefix>sis<mode><round_saeonly_name>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx-1.c: Add macros. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx10_2-satcvt-1.c: Add test. * gcc.target/i386/avx10_2-512-satcvt-1.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c: New test. * gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttpd2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttpd2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttpd2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttps2dqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttps2qqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttps2udqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttps2uqqs-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttsd2sis-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttsd2usis-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttss2sis-2.c: Ditto. * gcc.target/i386/avx10_2-vcvttss2usis-2.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/avx10_2-512satcvtintrin.h456
-rw-r--r--gcc/config/i386/avx10_2satcvtintrin.h977
-rw-r--r--gcc/config/i386/i386-builtin.def32
-rw-r--r--gcc/config/i386/sse.md83
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c73
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c138
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2dqs-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2qqs-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2udqs-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2dqs-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2qqs-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2udqs-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2uqqs-2.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2sis-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2usis-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2sis-2.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2usis-2.c46
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c26
31 files changed, 2830 insertions, 1 deletions
diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h
index 4286458..d625a64 100644
--- a/gcc/config/i386/avx10_2-512satcvtintrin.h
+++ b/gcc/config/i386/avx10_2-512satcvtintrin.h
@@ -438,6 +438,286 @@ _mm512_maskz_ipcvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
(__mmask16) __U,
__R);
}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
+{
+ return (__m512i)
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
#else
#define _mm512_ipcvt_roundph_epi16(A, R) \
((__m512i) \
@@ -614,6 +894,182 @@ _mm512_maskz_ipcvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
(_mm512_setzero_si512 ()), \
(__mmask16) (U), \
(R)))
+
+#define _mm512_cvtts_roundpd_epi32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epi64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epu32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundpd_epu64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epi32(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epi64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epu32(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask16) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) (W), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+ (__v16si) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask16) (U), \
+ (R)))
+
+#define _mm512_cvtts_roundps_epu64(A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_undefined_si512 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m512i) \
+ __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+ (__v8di) \
+ (_mm512_setzero_si512 ()), \
+ (__mmask8) (U), \
+ (R)))
#endif
#ifdef __DISABLE_AVX10_2_512__
diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h
index 4fcf789..d0e3e37 100644
--- a/gcc/config/i386/avx10_2satcvtintrin.h
+++ b/gcc/config/i386/avx10_2satcvtintrin.h
@@ -510,6 +510,238 @@ _mm_maskz_ipcvttps_epu32 (__mmask8 __U, __m128 __A)
(__mmask8) __U);
}
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttspd_epi32 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttspd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttspd_epi32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttspd_epi64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttspd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttspd_epi64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttspd_epu32 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttspd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttspd_epu32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttspd_epu64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttspd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttspd_epu64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsps_epi32 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttsps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttsps_epi32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsps_epi64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttsps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttsps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsps_epu32 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttsps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttsps_epu32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsps_epu64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttsps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttsps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -790,6 +1022,318 @@ _mm256_maskz_ipcvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
(__mmask8) __U,
__R);
}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundpd_epi32 (__m256d __A, const int __R)
+{
+ return
+ (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A,
+ const int __R)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R)
+{
+ return
+ (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundpd_epi64 (__m256d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundpd_epu32 (__m256d __A, const int __R)
+{
+ return
+ (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_undefined_si128 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A,
+ const int __R)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R)
+{
+ return
+ (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundpd_epu64 (__m256d __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundps_epi32 (__m256 __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundps_epi64 (__m128 __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundps_epu32 (__m256 __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_roundps_epu64 (__m128 __A, const int __R)
+{
+ return (__m256i)
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_undefined_si256 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A,
+ const int __R)
+{
+ return (__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R)
+{
+ return
+ (__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epi32 (__m128d __A, const int __R)
+{
+ return (int) __builtin_ia32_cvttsd2sis32_round ((__v2df) __A,
+ __R);
+}
+
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epu32 (__m128d __A, const int __R)
+{
+ return (unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) __A,
+ __R);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epi32 (__m128 __A, const int __R)
+{
+ return (int) __builtin_ia32_cvttss2sis32_round ((__v4sf) __A,
+ __R);
+}
+
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epu32 (__m128 __A, const int __R)
+{
+ return (unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) __A,
+ __R);
+}
#else
#define _mm256_ipcvt_roundph_epi16(A, R) \
@@ -1012,7 +1556,440 @@ _mm256_maskz_ipcvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
(_mm256_setzero_si256 ()), \
(__mmask8) (U), \
(R)))
+
+#define _mm256_cvtts_roundpd_epi32(A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_undefined_si128 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_setzero_si128 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epi64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epu32(A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_undefined_si128 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) (_mm_setzero_si128 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epu64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epi32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epi64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epu32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epu64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epi32(A, R) \
+ ((int) __builtin_ia32_cvttsd2sis32_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epu32(A, R) \
+ ((unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epi32(A, R) \
+ ((int) __builtin_ia32_cvttss2sis32_round ((__v4sf) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epu32(A, R) \
+ ((unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) (A), \
+ (R)))
+#define _mm256_cvtts_roundpd_epi32(A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_undefined_si128 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_setzero_si128 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epi64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epu32(A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) \
+ (_mm_undefined_si128 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
+ ((__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
+ ((__m128i) \
+ __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
+ (__v4si) (_mm_setzero_si128 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundpd_epu64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epi32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epi64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epu32(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+ (__v8si) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_cvtts_roundps_epu64(A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_undefined_si256 ()), \
+ (__mmask8) (-1), \
+ (R)))
+
+#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
+ ((__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) (W), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
+ ((__m256i) \
+ __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
+ (__v4di) \
+ (_mm256_setzero_si256 ()), \
+ (__mmask8) (U), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epi32(A, R) \
+ ((int) __builtin_ia32_cvttsd2sis32_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epu32(A, R) \
+ ((unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epi32(A, R) \
+ ((int) __builtin_ia32_cvttss2sis32_round ((__v4sf) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epu32(A, R) \
+ ((unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) (A), \
+ (R)))
+#endif
+
+#ifdef __x86_64__
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epi64 (__m128d __A, const int __R)
+{
+ return (long long) __builtin_ia32_cvttsd2sis64_round ((__v2df) __A,
+ __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epu64 (__m128d __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_cvttsd2usis64_round ((__v2df) __A,
+ __R);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epi64 (__m128 __A, const int __R)
+{
+ return (long long) __builtin_ia32_cvttss2sis64_round ((__v4sf) __A,
+ __R);
+}
+
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epu64 (__m128 __A, const int __R)
+{
+ return (unsigned long long) __builtin_ia32_cvttss2usis64_round ((__v4sf) __A,
+ __R);
+}
+#else
+
+#define _mm_cvtts_roundsd_epi64(A, R) \
+ ((long long) __builtin_ia32_cvttsd2sis64_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundsd_epu64(A, R) \
+ ((unsigned long long) __builtin_ia32_cvttsd2usis64_round ((__v2df) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epi64(A, R) \
+ ((long long) __builtin_ia32_cvttss2sis64_round ((__v4sf) (A), \
+ (R)))
+
+#define _mm_cvtts_roundss_epu64(A, R) \
+ ((unsigned long long) __builtin_ia32_cvttss2usis64_round ((__v4sf) (A), \
+ (R)))
#endif
+#endif /* __x86_64__ */
#ifdef __DISABLE_AVX10_2_256__
#undef __DISABLE_AVX10_2_256__
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index b85eba5b..7278d02 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3290,6 +3290,14 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2ibsv8hf_mask, "_
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2iubsv8hf_mask, "__builtin_ia32_cvttph2iubs128_mask", IX86_BUILTIN_CVTTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv4sf_mask, "__builtin_ia32_cvttps2ibs128_mask", IX86_BUILTIN_CVTTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv4sf_mask, "__builtin_ia32_cvttps2iubs128_mask", IX86_BUILTIN_CVTTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv2df_mask, "__builtin_ia32_cvttpd2dqs128_mask", IX86_BUILTIN_VCVTTPD2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv2df_mask, "__builtin_ia32_cvttpd2qqs128_mask", IX86_BUILTIN_VCVTTPD2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2udqsv2df_mask, "__builtin_ia32_cvttpd2udqs128_mask", IX86_BUILTIN_VCVTTPD2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2uqqsv2df_mask, "__builtin_ia32_cvttpd2uqqs128_mask", IX86_BUILTIN_VCVTTPD2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv4sf_mask, "__builtin_ia32_cvttps2dqs128_mask", IX86_BUILTIN_VCVTTPS2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv2di_mask, "__builtin_ia32_cvttps2qqs128_mask", IX86_BUILTIN_VCVTTPS2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv4sf_mask, "__builtin_ia32_cvttps2udqs128_mask", IX86_BUILTIN_VCVTTPS2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv2di_mask, "__builtin_ia32_cvttps2uqqs128_mask", IX86_BUILTIN_VCVTTPS2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
@@ -3766,6 +3774,30 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv8sf_mask_rou
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2ibsv16sf_mask_round, "__builtin_ia32_cvttps2ibs512_mask_round", IX86_BUILTIN_CVTTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv8sf_mask_round, "__builtin_ia32_cvttps2iubs256_mask_round", IX86_BUILTIN_CVTTPS2IUBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2iubsv16sf_mask_round, "__builtin_ia32_cvttps2iubs512_mask_round", IX86_BUILTIN_CVTTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv4df_mask_round, "__builtin_ia32_cvttpd2dqs256_mask_round", IX86_BUILTIN_VCVTTPD2DQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2dqsv8df_mask_round, "__builtin_ia32_cvttpd2dqs512_mask_round", IX86_BUILTIN_VCVTTPD2DQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv4df_mask_round, "__builtin_ia32_cvttpd2qqs256_mask_round", IX86_BUILTIN_VCVTTPD2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2qqsv8df_mask_round, "__builtin_ia32_cvttpd2qqs512_mask_round", IX86_BUILTIN_VCVTTPD2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2udqsv4df_mask_round, "__builtin_ia32_cvttpd2udqs256_mask_round", IX86_BUILTIN_VCVTTPD2UDQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2udqsv8df_mask_round, "__builtin_ia32_cvttpd2udqs512_mask_round", IX86_BUILTIN_VCVTTPD2UDQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2uqqsv4df_mask_round, "__builtin_ia32_cvttpd2uqqs256_mask_round", IX86_BUILTIN_VCVTTPD2UQQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2uqqsv8df_mask_round, "__builtin_ia32_cvttpd2uqqs512_mask_round", IX86_BUILTIN_VCVTTPD2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv8sf_mask_round, "__builtin_ia32_cvttps2dqs256_mask_round", IX86_BUILTIN_VCVTTPS2DQS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2dqsv16sf_mask_round, "__builtin_ia32_cvttps2dqs512_mask_round", IX86_BUILTIN_VCVTTPS2DQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv4di_mask_round, "__builtin_ia32_cvttps2qqs256_mask_round", IX86_BUILTIN_VCVTTPS2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2qqsv8di_mask_round, "__builtin_ia32_cvttps2qqs512_mask_round", IX86_BUILTIN_VCVTTPS2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv8sf_mask_round, "__builtin_ia32_cvttps2udqs256_mask_round", IX86_BUILTIN_VCVTTPS2UDQS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2udqsv16sf_mask_round, "__builtin_ia32_cvttps2udqs512_mask_round", IX86_BUILTIN_VCVTTPS2UDQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv4di_mask_round, "__builtin_ia32_cvttps2uqqs256_mask_round", IX86_BUILTIN_VCVTTPS2UQQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2uqqsv8di_mask_round, "__builtin_ia32_cvttps2uqqs512_mask_round", IX86_BUILTIN_VCVTTPS2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2sissi_round, "__builtin_ia32_cvttsd2sis32_round", IX86_BUILTIN_VCVTTSD2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2sisdi_round, "__builtin_ia32_cvttsd2sis64_round", IX86_BUILTIN_VCVTTSD2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2usissi_round, "__builtin_ia32_cvttsd2usis32_round", IX86_BUILTIN_VCVTTSD2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2usisdi_round, "__builtin_ia32_cvttsd2usis64_round", IX86_BUILTIN_VCVTTSD2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sissi_round, "__builtin_ia32_cvttss2sis32_round", IX86_BUILTIN_VCVTTSS2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sisdi_round, "__builtin_ia32_cvttss2sis64_round", IX86_BUILTIN_VCVTTSS2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usissi_round, "__builtin_ia32_cvttss2usis32_round", IX86_BUILTIN_VCVTTSS2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usisdi_round, "__builtin_ia32_cvttss2usis64_round", IX86_BUILTIN_VCVTTSS2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 113902e..847ac09 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -247,6 +247,8 @@
UNSPEC_VCVTTPH2IUBS
UNSPEC_VCVTTPS2IBS
UNSPEC_VCVTTPS2IUBS
+ UNSPEC_SFIX_SATURATION
+ UNSPEC_UFIX_SATURATION
])
(define_c_enum "unspecv" [
@@ -375,6 +377,10 @@
(V4DF "TARGET_AVX512DQ && TARGET_AVX512VL")
(V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")])
+(define_mode_iterator VF1_VF2_AVX10_2
+ [(V16SF "TARGET_AVX10_2_512") V8SF V4SF
+ (V8DF "TARGET_AVX10_2_512") V4DF V2DF])
+
(define_mode_iterator VFH
[(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
(V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
@@ -423,6 +429,9 @@
(define_mode_iterator VF2
[(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
+(define_mode_iterator VF2_AVX10_2
+ [(V8DF "TARGET_AVX10_2_512") V4DF V2DF])
+
;; All DFmode & HFmode vector float modes
(define_mode_iterator VF2H
[(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
@@ -570,6 +579,9 @@
(define_mode_iterator VI8
[(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+(define_mode_iterator VI8_AVX10_2
+ [(V8DI "TARGET_AVX10_2_512") V4DI V2DI])
+
(define_mode_iterator VI8_FVL
[(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")])
@@ -32228,7 +32240,9 @@
(UNSPEC_VCVTPS2IBS "")
(UNSPEC_VCVTPS2IUBS "u")
(UNSPEC_VCVTTPS2IBS "")
- (UNSPEC_VCVTTPS2IUBS "u")])
+ (UNSPEC_VCVTTPS2IUBS "u")
+ (UNSPEC_SFIX_SATURATION "")
+ (UNSPEC_UFIX_SATURATION "u")])
(define_int_attr sat_cvt_trunc_prefix
[(UNSPEC_VCVTNEBF162IBS "")
@@ -32306,3 +32320,70 @@
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_int_iterator UNSPEC_SAT_CVT_DS_SIGN_ITER
+ [UNSPEC_SFIX_SATURATION
+ UNSPEC_UFIX_SATURATION])
+
+(define_mode_attr pd2dqssuff
+ [(V16SF "") (V8SF "") (V4SF "")
+ (V8DF "") (V4DF "{y}") (V2DF "{x}")])
+
+(define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v")
+ (unspec:<VEC_GATHER_IDXSI>
+ [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v")
+ (unspec:<VEC_GATHER_IDXDI>
+ [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v")
+ (unspec:VI8_AVX10_2
+ [(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "vcvttps2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx10_2_vcvttsd2<sat_cvt_sign_prefix>sis<mode><round_saeonly_name>"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(vec_select:DF
+ (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256"
+ "vcvttsd2<sat_cvt_sign_prefix>sis\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx10_2_vcvttss2<sat_cvt_sign_prefix>sis<mode><round_saeonly_name>"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(vec_select:SF
+ (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_SAT_CVT_DS_SIGN_ITER))]
+ "TARGET_AVX10_2_256"
+ "vcvttss2<sat_cvt_sign_prefix>sis\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index be2fb5a..30c071a 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -1044,6 +1044,14 @@
#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8)
/* avx10_2satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8)
@@ -1054,6 +1062,24 @@
#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8)
+#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8)
+#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8)
+#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8)
+#ifdef __x86_64__
+#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8)
+#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8)
+#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8)
+#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
+#endif
#include <wmmintrin.h>
#include <immintrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c
index 84826c0..ecc356a 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-satcvt-1.c
@@ -36,12 +36,39 @@
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
+volatile __m256 hx;
+volatile __m256i hxi;
volatile __m512 x;
volatile __m512h xh;
volatile __m512i xi;
+volatile __m512d xd;
volatile __m512bh xbh;
volatile __mmask8 m8;
volatile __mmask16 m16;
@@ -97,4 +124,36 @@ avx10_2_test (void)
xi = _mm512_ipcvttnebf16_epu16 (xbh);
xi = _mm512_mask_ipcvttnebf16_epu16 (xi, m32, xbh);
xi = _mm512_maskz_ipcvttnebf16_epu16 (m32, xbh);
+
+ hxi = _mm512_cvtts_roundpd_epi32 (xd, 8);
+ hxi = _mm512_mask_cvtts_roundpd_epi32 (hxi, m8, xd, 8);
+ hxi = _mm512_maskz_cvtts_roundpd_epi32 (m8, xd, 8);
+
+ xi = _mm512_cvtts_roundpd_epi64 (xd, 8);
+ xi = _mm512_mask_cvtts_roundpd_epi64 (xi, m8, xd, 8);
+ xi = _mm512_maskz_cvtts_roundpd_epi64 (m8, xd, 8);
+
+ hxi = _mm512_cvtts_roundpd_epu32 (xd, 8);
+ hxi = _mm512_mask_cvtts_roundpd_epu32 (hxi, m8, xd, 8);
+ hxi = _mm512_maskz_cvtts_roundpd_epu32 (m8, xd, 8);
+
+ xi = _mm512_cvtts_roundpd_epu64 (xd, 8);
+ xi = _mm512_mask_cvtts_roundpd_epu64 (xi, m8, xd, 8);
+ xi = _mm512_maskz_cvtts_roundpd_epu64 (m8, xd, 8);
+
+ xi = _mm512_cvtts_roundps_epi32 (x, 8);
+ xi = _mm512_mask_cvtts_roundps_epi32 (xi, m16, x, 8);
+ xi = _mm512_maskz_cvtts_roundps_epi32 (m16, x, 8);
+
+ xi = _mm512_cvtts_roundps_epi64 (hx, 8);
+ xi = _mm512_mask_cvtts_roundps_epi64 (xi, m8, hx, 8);
+ xi = _mm512_maskz_cvtts_roundps_epi64 (m8, hx, 8);
+
+ xi = _mm512_cvtts_roundps_epu32 (x, 8);
+ xi = _mm512_mask_cvtts_roundps_epu32 (xi, m16, x, 8);
+ xi = _mm512_maskz_cvtts_roundps_epu32 (m16, x, 8);
+
+ xi = _mm512_cvtts_roundps_epu64 (hx, 8);
+ xi = _mm512_mask_cvtts_roundps_epu64 (xi, m8, hx, 8);
+ xi = _mm512_maskz_cvtts_roundps_epu64 (m8, hx, 8);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c
new file mode 100644
index 0000000..dd7ea88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2dqs-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+
+static void
+CALC (double *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > INT_MAX)
+ r[i] = INT_MAX;
+ else if (s[i] < INT_MIN)
+ r[i] = INT_MIN;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttspd_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttspd_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttspd_epi32) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundpd_epi32) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundpd_epi32) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundpd_epi32) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c
new file mode 100644
index 0000000..a286431
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2qqs-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (double *s, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > LLONG_MAX)
+ r[i] = LLONG_MAX;
+ else if (s[i] < LLONG_MIN)
+ r[i] = LLONG_MIN;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttspd_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttspd_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttspd_epi64) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundpd_epi64) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundpd_epi64) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundpd_epi64) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c
new file mode 100644
index 0000000..7685677
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2udqs-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+
+static void
+CALC (double *s, unsigned int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > UINT_MAX)
+ r[i] = UINT_MAX;
+ else if (s[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN_HALF, i_ud) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttspd_epu32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttspd_epu32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttspd_epu32) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundpd_epu32) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundpd_epu32) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundpd_epu32) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_ud) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_ud) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c
new file mode 100644
index 0000000..dbdd811
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttpd2uqqs-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 64)
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (double *s, unsigned long long *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > ULONG_MAX)
+ r[i] = ULONG_MAX;
+ else if (s[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttspd_epu64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttspd_epu64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttspd_epu64) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundpd_epu64) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundpd_epu64) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundpd_epu64) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_uq) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_uq) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c
new file mode 100644
index 0000000..7a9b6e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2dqs-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 32)
+#define DST_SIZE ((AVX512F_LEN) / 32)
+
+static void
+CALC (float *s, int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > INT_MAX)
+ r[i] = INT_MAX;
+ else if (s[i] < INT_MIN)
+ r[i] = INT_MIN;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttsps_epi32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttsps_epi32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttsps_epi32) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundps_epi32) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundps_epi32) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundps_epi32) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c
new file mode 100644
index 0000000..ed19c5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2qqs-2.c
@@ -0,0 +1,73 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (float *s, long long *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ if (s[i] > LLONG_MAX)
+ r[i] = LLONG_MAX;
+ else if (s[i] < LLONG_MIN)
+ r[i] = LLONG_MIN;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, ) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttsps_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttsps_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttsps_epi64) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundps_epi64) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundps_epi64) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundps_epi64) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c
new file mode 100644
index 0000000..b279af2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2udqs-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN) / 32)
+#define DST_SIZE ((AVX512F_LEN) / 32)
+
+static void
+CALC (float *s, unsigned int *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > UINT_MAX)
+ r[i] = UINT_MAX;
+ else if (s[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s;
+ UNION_TYPE (AVX512F_LEN, i_ud) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned int res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttsps_epu32) (s.x);
+ res2.x = INTRINSIC (_mask_cvttsps_epu32) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttsps_epu32) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundps_epu32) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundps_epu32) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundps_epu32) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_ud) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_ud) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_ud) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_ud) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_ud) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c
new file mode 100644
index 0000000..7151d07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvttps2uqqs-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2-512" } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#ifndef AVX10_2
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#endif
+#include "avx10-helper.h"
+#include <limits.h>
+
+#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
+#define DST_SIZE ((AVX512F_LEN) / 64)
+
+static void
+CALC (float *s, unsigned long long *r)
+{
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ if (s[i] > ULONG_MAX)
+ r[i] = ULONG_MAX;
+ else if (s[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, ) s;
+ UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[DST_SIZE] = { 0 };
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 1.23 * (i + 2) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+#if AVX512F_LEN == 128
+ res1.x = INTRINSIC (_cvttsps_epu64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttsps_epu64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttsps_epu64) (mask, s.x);
+#else
+ res1.x = INTRINSIC (_cvtts_roundps_epu64) (s.x, 8);
+ res2.x = INTRINSIC (_mask_cvtts_roundps_epu64) (res2.x, mask, s.x, 8);
+ res3.x = INTRINSIC (_maskz_cvtts_roundps_epu64) (mask, s.x, 8);
+#endif
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_uq) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_uq) (res_ref, mask, SRC_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c
index f04e3ec..83ef63c 100644
--- a/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-satcvt-1.c
@@ -72,19 +72,81 @@
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vcvttnebf162iubs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsy\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\{sae\}\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqsx\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqqs\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttss2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttss2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%e.x+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsd2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttsd2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttss2sis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttss2usis\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%r.x+(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
#include <immintrin.h>
volatile __m128 hx;
volatile __m128i hxi;
volatile __m128h hxh;
+volatile __m128d hxd;
volatile __m128bh hxbh;
volatile __m256 x;
volatile __m256h xh;
volatile __m256i xi;
+volatile __m256d xd;
volatile __m256bh xbh;
volatile __mmask8 m8;
volatile __mmask16 m16;
+volatile int i;
+volatile unsigned int ui;
+volatile long long ll;
+volatile unsigned long long ull;
void extern
avx10_2_test (void)
@@ -184,4 +246,80 @@ avx10_2_test (void)
hxi = _mm_ipcvttnebf16_epu16 (hxbh);
hxi = _mm_mask_ipcvttnebf16_epu16 (hxi, m8, hxbh);
hxi = _mm_maskz_ipcvttnebf16_epu16 (m8, hxbh);
+
+ hxi = _mm256_cvtts_roundpd_epi32 (xd, 8);
+ hxi = _mm256_mask_cvtts_roundpd_epi32 (hxi, m8, xd, 8);
+ hxi = _mm256_maskz_cvtts_roundpd_epi32 (m8, xd, 8);
+
+ xi = _mm256_cvtts_roundpd_epi64 (xd, 8);
+ xi = _mm256_mask_cvtts_roundpd_epi64 (xi, m8, xd, 8);
+ xi = _mm256_maskz_cvtts_roundpd_epi64 (m8, xd, 8);
+
+ hxi = _mm256_cvtts_roundpd_epu32 (xd, 8);
+ hxi = _mm256_mask_cvtts_roundpd_epu32 (hxi, m8, xd, 8);
+ hxi = _mm256_maskz_cvtts_roundpd_epu32 (m8, xd, 8);
+
+ xi = _mm256_cvtts_roundpd_epu64 (xd, 8);
+ xi = _mm256_mask_cvtts_roundpd_epu64 (xi, m8, xd, 8);
+ xi = _mm256_maskz_cvtts_roundpd_epu64 (m8, xd, 8);
+
+ xi = _mm256_cvtts_roundps_epi32 (x, 8);
+ xi = _mm256_mask_cvtts_roundps_epi32 (xi, m16, x, 8);
+ xi = _mm256_maskz_cvtts_roundps_epi32 (m16, x, 8);
+
+ xi = _mm256_cvtts_roundps_epi64 (hx, 8);
+ xi = _mm256_mask_cvtts_roundps_epi64 (xi, m8, hx, 8);
+ xi = _mm256_maskz_cvtts_roundps_epi64 (m8, hx, 8);
+
+ xi = _mm256_cvtts_roundps_epu32 (x, 8);
+ xi = _mm256_mask_cvtts_roundps_epu32 (xi, m16, x, 8);
+ xi = _mm256_maskz_cvtts_roundps_epu32 (m16, x, 8);
+
+ xi = _mm256_cvtts_roundps_epu64 (hx, 8);
+ xi = _mm256_mask_cvtts_roundps_epu64 (xi, m8, hx, 8);
+ xi = _mm256_maskz_cvtts_roundps_epu64 (m8, hx, 8);
+
+ hxi = _mm_cvttspd_epi32 (hxd);
+ hxi = _mm_mask_cvttspd_epi32 (hxi, m8, hxd);
+ hxi = _mm_maskz_cvttspd_epi32 (m8, hxd);
+
+ hxi = _mm_cvttspd_epi64 (hxd);
+ hxi = _mm_mask_cvttspd_epi64 (hxi, m8, hxd);
+ hxi = _mm_maskz_cvttspd_epi64 (m8, hxd);
+
+ hxi = _mm_cvttspd_epu32 (hxd);
+ hxi = _mm_mask_cvttspd_epu32 (hxi, m8, hxd);
+ hxi = _mm_maskz_cvttspd_epu32 (m8, hxd);
+
+ hxi = _mm_cvttspd_epu64 (hxd);
+ hxi = _mm_mask_cvttspd_epu64 (hxi, m8, hxd);
+ hxi = _mm_maskz_cvttspd_epu64 (m8, hxd);
+
+ hxi = _mm_cvttsps_epi32 (hx);
+ hxi = _mm_mask_cvttsps_epi32 (hxi, m8, hx);
+ hxi = _mm_maskz_cvttsps_epi32 (m8, hx);
+
+ hxi = _mm_cvttsps_epi64 (hx);
+ hxi = _mm_mask_cvttsps_epi64 (hxi, m8, hx);
+ hxi = _mm_maskz_cvttsps_epi64 (m8, hx);
+
+ hxi = _mm_cvttsps_epu32 (hx);
+ hxi = _mm_mask_cvttsps_epu32 (hxi, m8, hx);
+ hxi = _mm_maskz_cvttsps_epu32 (m8, hx);
+
+ hxi = _mm_cvttsps_epu64 (hx);
+ hxi = _mm_mask_cvttsps_epu64 (hxi, m8, hx);
+ hxi = _mm_maskz_cvttsps_epu64 (m8, hx);
+
+ i = _mm_cvtts_roundsd_epi32 (hxd, 8);
+ ui = _mm_cvtts_roundsd_epu32 (hxd, 8);
+ i = _mm_cvtts_roundss_epi32 (hx, 8);
+ ui = _mm_cvtts_roundss_epu32 (hx, 8);
+
+#ifdef __x86_64__
+ ll = _mm_cvtts_roundsd_epi64 (hxd, 8);
+ ull = _mm_cvtts_roundsd_epu64 (hxd, 8);
+ ll = _mm_cvtts_roundss_epi64 (hx, 8);
+ ull = _mm_cvtts_roundss_epu64 (hx, 8);
+#endif
}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2dqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2dqs-2.c
new file mode 100644
index 0000000..06cbb5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2dqs-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2dqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2dqs-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2qqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2qqs-2.c
new file mode 100644
index 0000000..df29d0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2qqs-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2qqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2qqs-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2udqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2udqs-2.c
new file mode 100644
index 0000000..9e9cea1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2udqs-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2udqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2udqs-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c
new file mode 100644
index 0000000..282b43f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttpd2uqqs-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2uqqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttpd2uqqs-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2dqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2dqs-2.c
new file mode 100644
index 0000000..57acd36
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2dqs-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2dqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2dqs-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2qqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2qqs-2.c
new file mode 100644
index 0000000..1e6bbfd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2qqs-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2qqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2qqs-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2udqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2udqs-2.c
new file mode 100644
index 0000000..4b175e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2udqs-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2udqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2udqs-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2uqqs-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2uqqs-2.c
new file mode 100644
index 0000000..3abebfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttps2uqqs-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2uqqs-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx10_2-512-vcvttps2uqqs-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2sis-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2sis-2.c
new file mode 100644
index 0000000..9e4bd71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2sis-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#include "avx10-helper.h"
+#include <limits.h>
+
+void
+TEST (void)
+{
+ UNION_TYPE (128, d) s;
+ int res1;
+ long long res2;
+ int res1_ref = 0;
+ long long res2_ref = 0;
+ int i, sign = 1;
+
+ s.a[0] = 2.46;
+
+ res1 = _mm_cvtts_roundsd_epi32 (s.x, 8);
+
+ if (s.a[0] > INT_MAX)
+ res1_ref = INT_MAX;
+ else if (s.a[0] < INT_MIN)
+ res1_ref = INT_MIN;
+ else
+ res1_ref = s.a[0];
+
+ if (res1 != res1_ref)
+ abort();
+
+#ifdef __x86_64__
+ res2 = _mm_cvtts_roundsd_epi64 (s.x, 8);
+
+ if (s.a[0] > LLONG_MAX)
+ res2_ref = LLONG_MAX;
+ else if (s.a[0] < LLONG_MIN)
+ res2_ref = LLONG_MIN;
+ else
+ res2_ref = s.a[0];
+
+ if (res2 != res2_ref)
+ abort();
+#endif
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2usis-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2usis-2.c
new file mode 100644
index 0000000..b4ab914
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttsd2usis-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#include "avx10-helper.h"
+#include <limits.h>
+
+void
+TEST (void)
+{
+ UNION_TYPE (128, d) s;
+ unsigned int res1;
+ unsigned long long res2;
+ unsigned int res1_ref = 0;
+ unsigned long long res2_ref = 0;
+ int i, sign = 1;
+
+ s.a[0] = 2.46;
+
+ res1 = _mm_cvtts_roundsd_epu32 (s.x, 8);
+
+ if (s.a[0] > UINT_MAX)
+ res1_ref = UINT_MAX;
+ else if (s.a[0] < 0)
+ res1_ref = 0;
+ else
+ res1_ref = s.a[0];
+
+ if (res1 != res1_ref)
+ abort();
+
+#ifdef __x86_64__
+ res2 = _mm_cvtts_roundsd_epu64 (s.x, 8);
+
+ if (s.a[0] > ULONG_MAX)
+ res2_ref = ULONG_MAX;
+ else if (s.a[0] < 0)
+ res2_ref = 0;
+ else
+ res2_ref = s.a[0];
+
+ if (res2 != res2_ref)
+ abort();
+#endif
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2sis-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2sis-2.c
new file mode 100644
index 0000000..67b6b8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2sis-2.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#include "avx10-helper.h"
+#include <limits.h>
+
+void
+TEST (void)
+{
+ UNION_TYPE (128, ) s;
+ int res1;
+ long long res2;
+ int res1_ref = 0;
+ long long res2_ref = 0;
+ int i, sign = 1;
+
+ s.a[0] = 2.46;
+
+ res1 = _mm_cvtts_roundss_epi32 (s.x, 8);
+
+ if (s.a[0] > INT_MAX)
+ res1_ref = INT_MAX;
+ else if (s.a[0] < INT_MIN)
+ res1_ref = INT_MIN;
+ else
+ res1_ref = s.a[0];
+
+ if (res1 != res1_ref)
+ abort();
+
+#ifdef __x86_64__
+ res2 = _mm_cvtts_roundss_epi64 (s.x, 8);
+
+ if (s.a[0] > LLONG_MAX)
+ res2_ref = LLONG_MAX;
+ else if (s.a[0] < LLONG_MIN)
+ res2_ref = LLONG_MIN;
+ else
+ res2_ref = s.a[0];
+
+ if (res2 != res2_ref)
+ abort();
+#endif
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2usis-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2usis-2.c
new file mode 100644
index 0000000..0734e95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcvttss2usis-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+#include "avx10-helper.h"
+#include <limits.h>
+
+void
+TEST (void)
+{
+ UNION_TYPE (128, ) s;
+ unsigned int res1;
+ unsigned long long res2;
+ unsigned int res1_ref = 0;
+ unsigned long long res2_ref = 0;
+
+ s.a[0] = 2.46;
+
+ res1 = _mm_cvtts_roundss_epu32 (s.x, 8);
+
+ if (s.a[0] > UINT_MAX)
+ res1_ref = UINT_MAX;
+ else if (s.a[0] < 0)
+ res1_ref = 0;
+ else
+ res1_ref = s.a[0];
+
+ if (res1 != res1_ref)
+ abort();
+
+#ifdef __x86_64__
+ res2 = _mm_cvtts_roundss_epu64 (s.x, 8);
+
+ if (s.a[0] > ULONG_MAX)
+ res2_ref = ULONG_MAX;
+ else if (s.a[0] < 0)
+ res2_ref = 0;
+ else
+ res2_ref = s.a[0];
+
+ if (res2 != res2_ref)
+ abort();
+#endif
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 5669fa1..1d6ca55 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1052,6 +1052,14 @@
#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8)
/* avx10_2satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8)
@@ -1062,5 +1070,23 @@
#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8)
+#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8)
+#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8)
+#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8)
+#ifdef __x86_64__
+#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8)
+#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8)
+#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8)
+#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
+#endif
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 550d263..799982b 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1457,6 +1457,30 @@ test_3 (_mm512_mask_ipcvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h,
test_3 (_mm512_mask_ipcvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8)
test_3 (_mm512_mask_ipcvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8)
test_3 (_mm512_mask_ipcvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundpd_epi32, __m256i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epi32, __m256i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epi64, __m512i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epi64, __m512i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epi64, __m512i, __m512i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epu32, __m256i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epu32, __m256i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epu64, __m512i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epu64, __m512i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epu64, __m512i, __m512i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundps_epi32, __m512i, __m512, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epi32, __m512i, __mmask16, __m512, 8)
+test_3 (_mm512_mask_cvtts_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundps_epi64, __m512i, __m256, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epi64, __m512i, __mmask8, __m256, 8)
+test_3 (_mm512_mask_cvtts_roundps_epi64, __m512i, __m512i, __mmask8, __m256, 8)
+test_1 (_mm512_cvtts_roundps_epu32, __m512i, __m512, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epu32, __m512i, __mmask16, __m512, 8)
+test_3 (_mm512_mask_cvtts_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundps_epu64, __m512i, __m256, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epu64, __m512i, __mmask8, __m256, 8)
+test_3 (_mm512_mask_cvtts_roundps_epu64, __m512i, __m512i, __mmask8, __m256, 8)
/* avx10_2satcvtintrin.h */
test_1 (_mm256_ipcvt_roundph_epi16, __m256i, __m256h, 8)
@@ -1483,3 +1507,37 @@ test_3 (_mm256_mask_ipcvtt_roundph_epi16, __m256i, __m256i, __mmask16, __m256h,
test_3 (_mm256_mask_ipcvtt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8)
test_3 (_mm256_mask_ipcvtt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8)
test_3 (_mm256_mask_ipcvtt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundpd_epi32, __m128i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epi32, __m128i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epi32, __m128i, __m128i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epi64, __m256i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epi64, __m256i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epi64, __m256i, __m256i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epu32, __m128i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epu32, __m128i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epu32, __m128i, __m128i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epu64, __m256i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epu64, __m256i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epu64, __m256i, __m256i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundps_epi32, __m256i, __m256, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epi32, __m256i, __mmask8, __m256, 8)
+test_3 (_mm256_mask_cvtts_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundps_epi64, __m256i, __m128, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epi64, __m256i, __mmask8, __m128, 8)
+test_3 (_mm256_mask_cvtts_roundps_epi64, __m256i, __m256i, __mmask8, __m128, 8)
+test_1 (_mm256_cvtts_roundps_epu32, __m256i, __m256, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epu32, __m256i, __mmask8, __m256, 8)
+test_3 (_mm256_mask_cvtts_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundps_epu64, __m256i, __m128, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epu64, __m256i, __mmask8, __m128, 8)
+test_3 (_mm256_mask_cvtts_roundps_epu64, __m256i, __m256i, __mmask8, __m128, 8)
+test_1 (_mm_cvtts_roundsd_epi32, int, __m128d, 8)
+test_1 (_mm_cvtts_roundsd_epu32, unsigned int, __m128d, 8)
+test_1 (_mm_cvtts_roundss_epi32, int, __m128, 8)
+test_1 (_mm_cvtts_roundss_epu32, unsigned int, __m128, 8)
+#ifdef __x86_64__
+test_1 (_mm_cvtts_roundsd_epi64, long long, __m128d, 8)
+test_1 (_mm_cvtts_roundsd_epu64, unsigned long long, __m128d, 8)
+test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8)
+test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8)
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index ba67ee2..b8eb6ae 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -1496,6 +1496,30 @@ test_3 (_mm512_mask_ipcvtt_roundph_epi16, __m512i, __m512i, __mmask32, __m512h,
test_3 (_mm512_mask_ipcvtt_roundph_epu16, __m512i, __m512i, __mmask32, __m512h, 8)
test_3 (_mm512_mask_ipcvtt_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8)
test_3 (_mm512_mask_ipcvtt_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundpd_epi32, __m256i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epi32, __m256i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epi32, __m256i, __m256i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epi64, __m512i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epi64, __m512i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epi64, __m512i, __m512i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epu32, __m256i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epu32, __m256i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epu32, __m256i, __m256i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundpd_epu64, __m512i, __m512d, 8)
+test_2 (_mm512_maskz_cvtts_roundpd_epu64, __m512i, __mmask8, __m512d, 8)
+test_3 (_mm512_mask_cvtts_roundpd_epu64, __m512i, __m512i, __mmask8, __m512d, 8)
+test_1 (_mm512_cvtts_roundps_epi32, __m512i, __m512, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epi32, __m512i, __mmask16, __m512, 8)
+test_3 (_mm512_mask_cvtts_roundps_epi32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundps_epi64, __m512i, __m256, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epi64, __m512i, __mmask8, __m256, 8)
+test_3 (_mm512_mask_cvtts_roundps_epi64, __m512i, __m512i, __mmask8, __m256, 8)
+test_1 (_mm512_cvtts_roundps_epu32, __m512i, __m512, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epu32, __m512i, __mmask16, __m512, 8)
+test_3 (_mm512_mask_cvtts_roundps_epu32, __m512i, __m512i, __mmask16, __m512, 8)
+test_1 (_mm512_cvtts_roundps_epu64, __m512i, __m256, 8)
+test_2 (_mm512_maskz_cvtts_roundps_epu64, __m512i, __mmask8, __m256, 8)
+test_3 (_mm512_mask_cvtts_roundps_epu64, __m512i, __m512i, __mmask8, __m256, 8)
/* avx10_2satcvtintrin.h */
test_1 (_mm256_ipcvt_roundph_epi16, __m256i, __m256h, 8)
@@ -1522,3 +1546,37 @@ test_3 (_mm256_mask_ipcvtt_roundph_epi16, __m256i, __m256i, __mmask16, __m256h,
test_3 (_mm256_mask_ipcvtt_roundph_epu16, __m256i, __m256i, __mmask16, __m256h, 8)
test_3 (_mm256_mask_ipcvtt_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8)
test_3 (_mm256_mask_ipcvtt_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundpd_epi32, __m128i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epi32, __m128i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epi32, __m128i, __m128i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epi64, __m256i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epi64, __m256i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epi64, __m256i, __m256i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epu32, __m128i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epu32, __m128i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epu32, __m128i, __m128i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundpd_epu64, __m256i, __m256d, 8)
+test_2 (_mm256_maskz_cvtts_roundpd_epu64, __m256i, __mmask8, __m256d, 8)
+test_3 (_mm256_mask_cvtts_roundpd_epu64, __m256i, __m256i, __mmask8, __m256d, 8)
+test_1 (_mm256_cvtts_roundps_epi32, __m256i, __m256, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epi32, __m256i, __mmask8, __m256, 8)
+test_3 (_mm256_mask_cvtts_roundps_epi32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundps_epi64, __m256i, __m128, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epi64, __m256i, __mmask8, __m128, 8)
+test_3 (_mm256_mask_cvtts_roundps_epi64, __m256i, __m256i, __mmask8, __m128, 8)
+test_1 (_mm256_cvtts_roundps_epu32, __m256i, __m256, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epu32, __m256i, __mmask8, __m256, 8)
+test_3 (_mm256_mask_cvtts_roundps_epu32, __m256i, __m256i, __mmask8, __m256, 8)
+test_1 (_mm256_cvtts_roundps_epu64, __m256i, __m128, 8)
+test_2 (_mm256_maskz_cvtts_roundps_epu64, __m256i, __mmask8, __m128, 8)
+test_3 (_mm256_mask_cvtts_roundps_epu64, __m256i, __m256i, __mmask8, __m128, 8)
+test_1 (_mm_cvtts_roundsd_epi32, int, __m128d, 8)
+test_1 (_mm_cvtts_roundsd_epu32, unsigned int, __m128d, 8)
+test_1 (_mm_cvtts_roundss_epi32, int, __m128, 8)
+test_1 (_mm_cvtts_roundss_epu32, unsigned int, __m128, 8)
+#ifdef __x86_64__
+test_1 (_mm_cvtts_roundsd_epi64, long long, __m128d, 8)
+test_1 (_mm_cvtts_roundsd_epu64, unsigned long long, __m128d, 8)
+test_1 (_mm_cvtts_roundss_epi64, long long, __m128, 8)
+test_1 (_mm_cvtts_roundss_epu64, unsigned long long, __m128, 8)
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 7e8b5d0..f3ab4a4 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -1026,6 +1026,14 @@
#define __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs512_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs512_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs512_mask_round(A, B, C, 8)
/* avx10_2satcvtintrin.h */
#define __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvtph2ibs256_mask_round(A, B, C, 8)
@@ -1036,6 +1044,24 @@
#define __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttph2iubs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2ibs256_mask_round(A, B, C, 8)
#define __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2iubs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttpd2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2dqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2qqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2udqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, D) __builtin_ia32_cvttps2uqqs256_mask_round(A, B, C, 8)
+#define __builtin_ia32_cvttsd2sis32_round(A, B) __builtin_ia32_cvttsd2sis32_round(A, 8)
+#define __builtin_ia32_cvttsd2usis32_round(A, B) __builtin_ia32_cvttsd2usis32_round(A, 8)
+#define __builtin_ia32_cvttss2sis32_round(A, B) __builtin_ia32_cvttss2sis32_round(A, 8)
+#define __builtin_ia32_cvttss2usis32_round(A, B) __builtin_ia32_cvttss2usis32_round(A, 8)
+#ifdef __x86_64__
+#define __builtin_ia32_cvttsd2sis64_round(A, B) __builtin_ia32_cvttsd2sis64_round(A, 8)
+#define __builtin_ia32_cvttsd2usis64_round(A, B) __builtin_ia32_cvttsd2usis64_round(A, 8)
+#define __builtin_ia32_cvttss2sis64_round(A, B) __builtin_ia32_cvttss2sis64_round(A, 8)
+#define __builtin_ia32_cvttss2usis64_round(A, B) __builtin_ia32_cvttss2usis64_round(A, 8)
+#endif
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512")