/* Copyright (C) 2024 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #ifndef _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif #ifndef _AVX10_2ROUNDINGINTRIN_H_INCLUDED #define _AVX10_2ROUNDINGINTRIN_H_INCLUDED #ifndef __AVX10_2_256__ #pragma GCC push_options #pragma GCC target("avx10.2-256") #define __DISABLE_AVX10_2_256__ #endif /* __AVX10_2_256__ */ #ifdef __OPTIMIZE__ extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_round_pd (__m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_add_round_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_add_round_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_round_ph (__m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_add_round_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_add_round_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_round_ps (__m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_add_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_add_round_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_round_pd_mask (__m256d __A, __m256d __B, const int __C, const int __R) { return (__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) __A, (__v4df) __B, __C, (__mmask8) -1, __R); } extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cmp_round_pd_mask (__mmask8 __U, __m256d __A, __m256d __B, const int __C, const int __R) { return (__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) __A, (__v4df) __B, __C, (__mmask8) __U, __R); } extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_round_ph_mask (__m256h __A, __m256h __B, const int __C, const int __R) { return (__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) __A, (__v16hf) __B, __C, (__mmask16) -1, __R); } extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cmp_round_ph_mask (__mmask16 __U, __m256h __A, __m256h __B, const int __C, const int __R) { return (__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) __A, (__v16hf) __B, __C, (__mmask16) __U, __R); } extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_round_ps_mask (__m256 __A, __m256 __B, const int __C, const int __R) { return (__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) __A, (__v8sf) __B, __C, (__mmask8) -1, __R); } extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cmp_round_ps_mask (__mmask8 __U, __m256 __A, __m256 __B, const int __C, const int __R) { return (__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) __A, (__v8sf) __B, __C, (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepi32_ph (__m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) -1, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepi32_ph (__m128h __W, __mmask8 __U, __m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A, (__v8hf) __W, (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepi32_ph (__mmask8 __U, __m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepi32_ps (__m256i __A, const int __R) { return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepi32_ps (__m256 __W, __mmask8 __U, __m256i __A, const int __R) { return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepi32_ps (__mmask8 __U, __m256i __A, const int __R) { return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundpd_ph (__m256d __A, const int __R) { return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) -1, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundpd_ph (__m128h __W, __mmask8 __U, __m256d __A, const int __R) { return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A, (__v8hf) __W, (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundpd_ph (__mmask8 __U, __m256d __A, const int __R) { return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) __U, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundpd_ps (__m256d __A, const int __R) { return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A, (__v4sf) _mm_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundpd_ps (__m128 __W, __mmask8 __U, __m256d __A, const int __R) { return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A, (__v4sf) __W, (__mmask8) __U, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundpd_ps (__mmask8 __U, __m256d __A, const int __R) { return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundpd_epi32 (__m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A, (__v4si) _mm_undefined_si128 (), (__mmask8) -1, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A, (__v4si) __W, (__mmask8) __U, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundpd_epi64 (__m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundpd_epu32 (__m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A, (__v4si) _mm_undefined_si128 (), (__mmask8) -1, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A, (__v4si) __W, (__mmask8) __U, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundpd_epu64 (__m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundph_epi32 (__m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundph_epi32 (__m256i __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A, (__v8si) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundph_epi32 (__mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundph_pd (__m128h __A, const int __R) { return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundph_pd (__m256d __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundph_pd (__mmask8 __U, __m128h __A, const int __R) { return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundph_ps (__m128h __A, const int __R) { return (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundph_ps (__m256 __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundph_ps (__mmask8 __U, __m128h __A, const int __R) { return (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtx_roundph_ps (__m128h __A, const int __R) { return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtx_roundph_ps (__m256 __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtx_roundph_ps (__mmask8 __U, __m128h __A, const int __R) { return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundph_epi64 (__m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundph_epi64 (__m256i __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundph_epi64 (__mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundph_epu32 (__m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundph_epu32 (__m256i __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A, (__v8si) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundph_epu32 (__mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundph_epu64 (__m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundph_epu64 (__m256i __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundph_epu64 (__mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundph_epu16 (__m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A, (__v16hi) _mm256_undefined_si256 (), (__mmask16) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A, (__v16hi) __W, (__mmask16) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A, (__v16hi) _mm256_setzero_si256 (), (__mmask16) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundph_epi16 (__m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A, (__v16hi) _mm256_undefined_si256 (), (__mmask16) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A, (__v16hi) __W, (__mmask16) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A, (__v16hi) _mm256_setzero_si256 (), (__mmask16) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundps_pd (__m128 __A, const int __R) { return (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundps_pd (__m256d __W, __mmask8 __U, __m128 __A, const int __R) { return (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundps_pd (__mmask8 __U, __m128 __A, const int __R) { return (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtx_roundps_ph (__m256 __A, const int __R) { return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) -1, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtx_roundps_ph (__m128h __W, __mmask8 __U, __m256 __A, const int __R) { return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A, (__v8hf) __W, (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtx_roundps_ph (__mmask8 __U, __m256 __A, const int __R) { return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundps_epi32 (__m256 __A, const int __R) { return (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A, const int __R) { return (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A, (__v8si) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R) { return (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundps_epi64 (__m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundps_epu32 (__m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A, (__v8si) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundps_epu64 (__m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepi64_pd (__m256i __A, const int __R) { return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepi64_pd (__m256d __W, __mmask8 __U, __m256i __A, const int __R) { return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepi64_pd (__mmask8 __U, __m256i __A, const int __R) { return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepi64_ph (__m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) -1, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepi64_ph (__m128h __W, __mmask8 __U, __m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A, (__v8hf) __W, (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepi64_ph (__mmask8 __U, __m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) __U, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepi64_ps (__m256i __A, const int __R) { return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) -1, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepi64_ps (__m128 __W, __mmask8 __U, __m256i __A, const int __R) { return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A, (__v4sf) __W, (__mmask8) __U, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepi64_ps (__mmask8 __U, __m256i __A, const int __R) { return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundpd_epi32 (__m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A, (__v4si) _mm_undefined_si128 (), (__mmask8) -1, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A, (__v4si) __W, (__mmask8) __U, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundpd_epi64 (__m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundpd_epu32 (__m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A, (__v4si) _mm_undefined_si128 (), (__mmask8) -1, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A, (__v4si) __W, (__mmask8) __U, __R); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A, (__v4si) _mm_setzero_si128 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundpd_epu64 (__m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A, (__v4di) \ _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundph_epi32 (__m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundph_epi32 (__m256i __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A, (__v8si) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundph_epi32 (__mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundph_epi64 (__m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundph_epi64 (__m256i __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundph_epi64 (__mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundph_epu32 (__m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundph_epu32 (__m256i __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A, (__v8si) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundph_epu32 (__mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundph_epu64 (__m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundph_epu64 (__m256i __W, __mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundph_epu64 (__mmask8 __U, __m128h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundph_epu16 (__m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A, (__v16hi) _mm256_setzero_si256 (), (__mmask16) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A, (__v16hi) __W, (__mmask16) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A, (__v16hi) _mm256_setzero_si256 (), (__mmask16) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundph_epi16 (__m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A, (__v16hi) _mm256_setzero_si256 (), (__mmask16) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A, (__v16hi) __W, (__mmask16) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R) { return (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A, (__v16hi) _mm256_setzero_si256 (), (__mmask16) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundps_epi32 (__m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A, (__v8si) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundps_epi64 (__m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundps_epu32 (__m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A, (__v8si) _mm256_undefined_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A, (__v8si) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A, (__v8si) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtt_roundps_epu64 (__m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) -1, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvtt_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A, (__v4di) __W, (__mmask8) __U, __R); } extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A, (__v4di) _mm256_setzero_si256 (), (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepu32_ph (__m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) -1, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepu32_ph (__m128h __W, __mmask8 __U, __m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A, (__v8hf) __W, (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepu32_ph (__mmask8 __U, __m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepu32_ps (__m256i __A, const int __R) { return (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepu32_ps (__m256 __W, __mmask8 __U, __m256i __A, const int __R) { return (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepu32_ps (__mmask8 __U, __m256i __A, const int __R) { return (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepu64_pd (__m256i __A, const int __R) { return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepu64_pd (__m256d __W, __mmask8 __U, __m256i __A, const int __R) { return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepu64_pd (__mmask8 __U, __m256i __A, const int __R) { return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepu64_ph (__m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) -1, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepu64_ph (__m128h __W, __mmask8 __U, __m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A, (__v8hf) __W, (__mmask8) __U, __R); } extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepu64_ph (__mmask8 __U, __m256i __A, const int __R) { return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A, (__v8hf) _mm_setzero_ph (), (__mmask8) __U, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepu64_ps (__m256i __A, const int __R) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) -1, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepu64_ps (__m128 __W, __mmask8 __U, __m256i __A, const int __R) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A, (__v4sf) __W, (__mmask8) __U, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepu64_ps (__mmask8 __U, __m256i __A, const int __R) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A, (__v4sf) _mm_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepu16_ph (__m256i __A, const int __R) { return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A, (__v16hf) _mm256_setzero_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepu16_ph (__m256h __W, __mmask16 __U, __m256i __A, const int __R) { return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepu16_ph (__mmask16 __U, __m256i __A, const int __R) { return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvt_roundepi16_ph (__m256i __A, const int __R) { return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A, (__v16hf) _mm256_setzero_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cvt_roundepi16_ph (__m256h __W, __mmask16 __U, __m256i __A, const int __R) { return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_cvt_roundepi16_ph (__mmask16 __U, __m256i __A, const int __R) { return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_round_pd (__m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_div_round_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_div_round_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_round_ph (__m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_div_round_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_div_round_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_round_ps (__m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_div_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_div_round_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fcmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfcmaddcph256_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fcmadd_round_pch (__m256h __A, __mmask8 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fcmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, __mmask8 __U, const int __R) { return (__m256h) __builtin_ia32_vfcmaddcph256_mask3_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fcmadd_round_pch (__mmask8 __U, __m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfcmaddcph256_maskz_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fcmul_round_pch (__m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_vfcmulcph256_round ((__v16hf) __A, (__v16hf) __B, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fcmul_round_pch (__m256h __W, __mmask8 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fcmul_round_pch (__mmask8 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fixupimm_round_pd (__m256d __A, __m256d __B, __m256i __D, const int __C, const int __R) { return (__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4di) __D, __C, (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fixupimm_round_pd (__m256d __A, __mmask8 __U, __m256d __B, __m256i __D, const int __C, const int __R) { return (__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4di) __D, __C, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fixupimm_round_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256i __D, const int __C, const int __R) { return (__m256d) __builtin_ia32_fixupimmpd256_maskz_round ((__v4df) __A, (__v4df) __B, (__v4di) __D, __C, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fixupimm_round_ps (__m256 __A, __m256 __B, __m256i __D, const int __C, const int __R) { return (__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8si) __D, __C, (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fixupimm_round_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256i __D, const int __C, const int __R) { return (__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8si) __D, __C, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fixupimm_round_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256i __D, const int __C, const int __R) { return (__m256) __builtin_ia32_fixupimmps256_maskz_round ((__v8sf) __A, (__v8sf) __B, (__v8si) __D, __C, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmaddpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmaddpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, __mmask8 __U, const int __R) { return (__m256d) __builtin_ia32_vfmaddpd256_mask3_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmaddpd256_maskz_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, __mmask16 __U, const int __R) { return (__m256h) __builtin_ia32_vfmaddph256_mask3_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddph256_maskz_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmaddps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmaddps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmadd_round_ps (__m256 __A, __m256 __B, __m256 __D, __mmask8 __U, const int __R) { return (__m256) __builtin_ia32_vfmaddps256_mask3_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmaddps256_maskz_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddcph256_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmadd_round_pch (__m256h __A, __mmask16 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, __mmask16 __U, const int __R) { return (__m256h) __builtin_ia32_vfmaddcph256_mask3_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmadd_round_pch (__mmask16 __U, __m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddcph256_maskz_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmaddsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmaddsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmaddsub_round_pd (__m256d __A, __m256d __B, __m256d __D, __mmask8 __U, const int __R) { return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmaddsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmaddsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddsubph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmaddsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddsubph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmaddsub_round_ph (__m256h __A, __m256h __B, __m256h __D, __mmask16 __U, const int __R) { return (__m256h) __builtin_ia32_vfmaddsubph256_mask3_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmaddsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmaddsubph256_maskz_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmaddsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmaddsubps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmaddsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmaddsubps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmaddsub_round_ps (__m256 __A, __m256 __B, __m256 __D, __mmask8 __U, const int __R) { return (__m256) __builtin_ia32_vfmaddsubps256_mask3_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmaddsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmaddsubps256_maskz_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmsubpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmsubpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, __mmask8 __U, const int __R) { return (__m256d) __builtin_ia32_vfmsubpd256_mask3_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmsubpd256_maskz_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmsubph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmsubph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, __mmask16 __U, const int __R) { return (__m256h) __builtin_ia32_vfmsubph256_mask3_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmsubph256_maskz_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmsubps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmsubps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmsub_round_ps (__m256 __A, __m256 __B, __m256 __D, __mmask8 __U, const int __R) { return (__m256) __builtin_ia32_vfmsubps256_mask3_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmsubps256_maskz_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsubadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmsubaddpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmsubadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmsubaddpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmsubadd_round_pd (__m256d __A, __m256d __B, __m256d __D, __mmask8 __U, const int __R) { return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmsubadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfmsubaddpd256_maskz_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsubadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmsubaddph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmsubadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmsubaddph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmsubadd_round_ph (__m256h __A, __m256h __B, __m256h __D, __mmask16 __U, const int __R) { return (__m256h) __builtin_ia32_vfmsubaddph256_mask3_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmsubadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmsubaddph256_maskz_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsubadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmsubaddps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmsubadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmsubaddps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fmsubadd_round_ps (__m256 __A, __m256 __B, __m256 __D, __mmask8 __U, const int __R) { return (__m256) __builtin_ia32_vfmsubaddps256_mask3_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmsubadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfmsubaddps256_maskz_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmul_round_pch (__m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmulcph256_round ((__v16hf) __B, (__v16hf) __D, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fmul_round_pch (__m256h __A, __mmask8 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) __B, (__v16hf) __D, (__v16hf) __A, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fmul_round_pch (__mmask8 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) __B, (__v16hf) __D, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fnmadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fnmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, __mmask8 __U, const int __R) { return (__m256d) __builtin_ia32_vfnmaddpd256_mask3_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfnmaddpd256_maskz_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfnmaddph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fnmadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfnmaddph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fnmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, __mmask16 __U, const int __R) { return (__m256h) __builtin_ia32_vfnmaddph256_mask3_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfnmaddph256_maskz_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfnmaddps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fnmadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfnmaddps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fnmadd_round_ps (__m256 __A, __m256 __B, __m256 __D, __mmask8 __U, const int __R) { return (__m256) __builtin_ia32_vfnmaddps256_mask3_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfnmaddps256_maskz_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fnmsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fnmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, __mmask8 __U, const int __R) { return (__m256d) __builtin_ia32_vfnmsubpd256_mask3_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256d __D, const int __R) { return (__m256d) __builtin_ia32_vfnmsubpd256_maskz_round ((__v4df) __A, (__v4df) __B, (__v4df) __D, (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfnmsubph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fnmsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfnmsubph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fnmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, __mmask16 __U, const int __R) { return (__m256h) __builtin_ia32_vfnmsubph256_mask3_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B, __m256h __D, const int __R) { return (__m256h) __builtin_ia32_vfnmsubph256_maskz_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __D, (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfnmsubps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fnmsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfnmsubps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask3_fnmsub_round_ps (__m256 __A, __m256 __B, __m256 __D, __mmask8 __U, const int __R) { return (__m256) __builtin_ia32_vfnmsubps256_mask3_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256 __D, const int __R) { return (__m256) __builtin_ia32_vfnmsubps256_maskz_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __D, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getexp_round_pd (__m256d __A, const int __R) { return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_getexp_round_pd (__m256d __W, __mmask8 __U, __m256d __A, const int __R) { return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_getexp_round_pd (__mmask8 __U, __m256d __A, const int __R) { return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getexp_round_ph (__m256h __A, const int __R) { return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A, (__v16hf) _mm256_setzero_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_getexp_round_ph (__m256h __W, __mmask16 __U, __m256h __A, const int __R) { return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_getexp_round_ph (__mmask16 __U, __m256h __A, const int __R) { return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getexp_round_ps (__m256 __A, const int __R) { return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_getexp_round_ps (__m256 __W, __mmask8 __U, __m256 __A, const int __R) { return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_getexp_round_ps (__mmask8 __U, __m256 __A, const int __R) { return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getmant_round_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A, (__C << 2) | __B, _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_getmant_round_pd (__m256d __W, __mmask8 __U, __m256d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A, (__C << 2) | __B, (__v4df) __W, __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_getmant_round_pd (__mmask8 __U, __m256d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A, (__C << 2) | __B, (__v4df) _mm256_setzero_pd (), __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getmant_round_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A, (__C << 2) | __B, _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_getmant_round_ph (__m256h __W, __mmask16 __U, __m256h __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A, (__C << 2) | __B, (__v16hf) __W, __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_getmant_round_ph (__mmask8 __U, __m256h __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A, (__C << 2) | __B, (__v16hf) _mm256_setzero_ph (), __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getmant_round_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A, (__C << 2) | __B, _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_getmant_round_ps (__m256 __W, __mmask8 __U, __m256 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A, (__C << 2) | __B, (__v8sf) __W, __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_getmant_round_ps (__mmask8 __U, __m256 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C, const int __R) { return (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A, (__C << 2) | __B, (__v8sf) _mm256_setzero_ps (), __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_round_pd (__m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_max_round_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_max_round_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_round_ph (__m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_max_round_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_max_round_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_round_ps (__m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_max_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_max_round_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_round_pd (__m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_min_round_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_min_round_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_round_ph (__m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_min_round_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_min_round_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_round_ps (__m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_min_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_min_round_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_round_pd (__m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_mul_round_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_mul_round_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_round_ph (__m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_mul_round_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_mul_round_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_round_ps (__m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_mul_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_mul_round_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_range_round_pd (__m256d __A, __m256d __B, const int __C, const int __R) { return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A, (__v4df) __B, __C, (__v4df) _mm256_setzero_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_range_round_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B, const int __C, const int __R) { return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A, (__v4df) __B, __C, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_range_round_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C, const int __R) { return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A, (__v4df) __B, __C, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_range_round_ps (__m256 __A, __m256 __B, const int __C, const int __R) { return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A, (__v8sf) __B, __C, (__v8sf) _mm256_setzero_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_range_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, const int __C, const int __R) { return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_range_round_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C, const int __R) { return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A, (__v8sf) __B, __C, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_reduce_round_pd (__m256d __A, const int __C, const int __R) { return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A, __C, (__v4df) _mm256_setzero_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_reduce_round_pd (__m256d __W, __mmask8 __U, __m256d __A, const int __C, const int __R) { return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A, __C, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_reduce_round_pd (__mmask8 __U, __m256d __A, const int __C, const int __R) { return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A, __C, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_reduce_round_ph (__m256h __A, const int __C, const int __R) { return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A, __C, (__v16hf) _mm256_setzero_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_reduce_round_ph (__m256h __W, __mmask16 __U, __m256h __A, const int __C, const int __R) { return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A, __C, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_reduce_round_ph (__mmask16 __U, __m256h __A, const int __C, const int __R) { return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A, __C, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_reduce_round_ps (__m256 __A, const int __C, const int __R) { return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A, __C, (__v8sf) _mm256_setzero_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_reduce_round_ps (__m256 __W, __mmask8 __U, __m256 __A, const int __C, const int __R) { return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A, __C, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_reduce_round_ps (__mmask8 __U, __m256 __A, const int __C, const int __R) { return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A, __C, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_roundscale_round_pd (__m256d __A, const int __C, const int __R) { return (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A, __C, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_roundscale_round_pd (__m256d __W, __mmask8 __U, __m256d __A, const int __C, const int __R) { return (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A, __C, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_roundscale_round_pd (__mmask8 __U, __m256d __A, const int __C, const int __R) { return (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A, __C, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_roundscale_round_ph (__m256h __A, const int __C, const int __R) { return (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A, __C, (__v16hf) _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_roundscale_round_ph (__m256h __W, __mmask16 __U, __m256h __A, const int __C, const int __R) { return (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A, __C, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_roundscale_round_ph (__mmask16 __U, __m256h __A, const int __C, const int __R) { return (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A, __C, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_roundscale_round_ps (__m256 __A, const int __C, const int __R) { return (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A, __C, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_roundscale_round_ps (__m256 __W, __mmask8 __U, __m256 __A, const int __C, const int __R) { return (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A, __C, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_roundscale_round_ps (__mmask8 __U, __m256 __A, const int __C, const int __R) { return (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A, __C, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_scalef_round_pd (__m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_scalef_round_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_scalef_round_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_scalef_round_ph (__m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_scalef_round_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_scalef_round_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_scalef_round_ps (__m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_scalef_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_scalef_round_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sqrt_round_pd (__m256d __A, const int __R) { return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_sqrt_round_pd (__m256d __W, __mmask8 __U, __m256d __A, const int __R) { return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_sqrt_round_pd (__mmask8 __U, __m256d __A, const int __R) { return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sqrt_round_ph (__m256h __A, const int __R) { return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A, (__v16hf) _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_sqrt_round_ph (__m256h __W, __mmask16 __U, __m256h __A, const int __R) { return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_sqrt_round_ph (__mmask16 __U, __m256h __A, const int __R) { return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sqrt_round_ps (__m256 __A, const int __R) { return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_sqrt_round_ps (__m256 __W, __mmask8 __U, __m256 __A, const int __R) { return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_sqrt_round_ps (__mmask8 __U, __m256 __A, const int __R) { return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_round_pd (__m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_undefined_pd (), (__mmask8) -1, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_sub_round_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) __W, (__mmask8) __U, __R); } extern __inline __m256d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_sub_round_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __R) { return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A, (__v4df) __B, (__v4df) _mm256_setzero_pd (), (__mmask8) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_round_ph (__m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_undefined_ph (), (__mmask16) -1, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_sub_round_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) __W, (__mmask16) __U, __R); } extern __inline __m256h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_sub_round_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __R) { return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A, (__v16hf) __B, (__v16hf) _mm256_setzero_ph (), (__mmask16) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_round_ps (__m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_undefined_ps (), (__mmask8) -1, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_sub_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) __W, (__mmask8) __U, __R); } extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_sub_round_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __R) { return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A, (__v8sf) __B, (__v8sf) _mm256_setzero_ps (), (__mmask8) __U, __R); } #else #define _mm256_add_round_pd(A, B, R) \ ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_add_round_pd(W, U, A, B, R) \ ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_add_round_pd(U, A, B, R) \ ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_add_round_ph(A, B, R) \ ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_undefined_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_add_round_ph(W, U, A, B, R) \ ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_add_round_ph(U, A, B, R) \ ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_add_round_ps(A, B, R) \ ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_add_round_ps(W, U, A, B, R) \ ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_add_round_ps(U, A, B, R)\ ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_cmp_round_pd_mask(A, B, C, R) \ ((__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (C), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cmp_round_pd_mask(U, A, B, C, R) \ ((__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (C), \ (__mmask8) (U), \ (R))) #define _mm256_cmp_round_ph_mask(A, B, C, R) \ ((__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (C), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_cmp_round_ph_mask(U, A, B, C, R) \ ((__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (C), \ (__mmask16) (U), \ (R))) #define _mm256_cmp_round_ps_mask(A, B, C, R) \ ((__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (C), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cmp_round_ps_mask(U, A, B, C, R) \ ((__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (C), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepi32_ph(A, R) \ ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepi32_ph(W, U, A, R) \ ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \ (__v8hf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepi32_ph(U, A, R) \ ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepi32_ps(A, R) \ ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepi32_ps(W, U, A, R) \ ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepi32_ps(U, A, R) \ ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundpd_ph(A, R) \ ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \ (_mm_setzero_ph ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundpd_ph(W, U, A, R) \ ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \ (__v8hf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundpd_ph(U, A, R) \ ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \ (_mm_setzero_ph ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundpd_ps(A, R) \ ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \ (__v4sf) \ (_mm_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundpd_ps(W, U, A, R) \ ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \ (__v4sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundpd_ps(U, A, R) \ ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \ (__v4sf) \ (_mm_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundpd_epi32(A, R) \ ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \ (__v4si) \ (_mm_undefined_si128 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundpd_epi32(W, U, A, R) \ ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \ (__v4si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundpd_epi32(U, A, R)\ ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \ (__v4si) \ (_mm_setzero_si128 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundpd_epi64(A, R) \ ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundpd_epi64(W, U, A, R) \ ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundpd_epi64(U, A, R) \ ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundpd_epu32(A, R) \ ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \ (__v4si) \ (_mm_undefined_si128 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundpd_epu32(W, U, A, R) \ ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \ (__v4si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundpd_epu32(U, A, R) \ ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \ (__v4si) \ (_mm_setzero_si128 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundpd_epu64(A, R) \ ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \ (__v4di) \ (_mm256_setzero_si256 ()),\ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundpd_epu64(W, U, A, R) \ ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundpd_epu64(U, A, R) \ ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \ (__v4di) \ (_mm256_setzero_si256 ()),\ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundph_epi32(A, R) \ ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()),\ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundph_epi32(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \ (__v8si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundph_epi32(U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()),\ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundph_pd(A, R) \ ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundph_pd(W, U, A, R) \ ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundph_pd(U, A, R) \ ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundph_ps(A, R) \ ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundph_ps(W, U, A, R) \ ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundph_ps(U, A, R) \ ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtx_roundph_ps(A, R) \ ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtx_roundph_ps(W, U, A, R) \ ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtx_roundph_ps(U, A, R) \ ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundph_epi64(A, R) \ ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()),\ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundph_epi64(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundph_epi64(U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()),\ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundph_epu32(A, R) \ ((__m256i) \ __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundph_epu32(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \ (__v8si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundph_epu32(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundph_epu64(A, R) \ ((__m256i) \ __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundph_epu64(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundph_epu64(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundph_epu16(A, R) \ ((__m256i) \ __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) \ (_mm256_undefined_si256 ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_cvt_roundph_epu16(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_cvt_roundph_epu16(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) \ (_mm256_setzero_si256 ()), \ (__mmask16) (U), \ (R))) #define _mm256_cvt_roundph_epi16(A, R) \ ((__m256i) \ __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \ (__v16hi) \ (_mm256_undefined_si256 ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_cvt_roundph_epi16(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \ (__v16hi) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_cvt_roundph_epi16(U, A, R) \ ((__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \ (__v16hi) \ (_mm256_setzero_si256 ()), \ (__mmask16) (U), \ (R))) #define _mm256_cvt_roundps_pd(A, R) \ ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundps_pd(W, U, A, R) \ ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundps_pd(U, A, R) \ ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtx_roundps_ph(A, R) \ ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtx_roundps_ph(W, U, A, R) \ ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \ (__v8hf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtx_roundps_ph(U, A, R) \ ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundps_epi32(A, R) \ ((__m256i) \ __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \ (__v8si) \ (_mm256_undefined_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundps_epi32(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \ (__v8si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundps_epi32(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundps_epi64(A, R) \ ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundps_epi64(W, U, A, R) \ ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundps_epi64(U, A, R) \ ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundps_epu32(A, R) \ ((__m256i) \ __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \ (__v8si) \ (_mm256_undefined_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundps_epu32(W, U, A, R) \ ((__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \ (__v8si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundps_epu32(U, A, R) \ ((__m256i) \ __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundps_epu64(B, R) \ ((__m256i) \ __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (B), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundps_epu64(W, U, A, R) \ ((__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundps_epu64(U, A, R) \ ((__m256i) \ __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepi64_pd(A, R) \ ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepi64_pd(W, U, A, R) \ ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepi64_pd(U, A, R) \ ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepi64_ph(A, R) \ ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepi64_ph(W, U, A, R) \ ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \ (__v8hf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepi64_ph(U, A, R) \ ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepi64_ps(A, R) \ ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \ (__v4sf) \ (_mm_setzero_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepi64_ps(W, U, A, R) \ ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \ (__v4sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepi64_ps(U, A, R) \ ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \ (__v4sf) \ (_mm_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundpd_epi32(A, R) \ ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \ (__v4si) \ (_mm_undefined_si128 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundpd_epi32(W, U, A, R) \ ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \ (__v4si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundpd_epi32(U, A, R) \ ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \ (__v4si) \ (_mm_setzero_si128 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundpd_epi64(A, R) \ ((__m256i) \ __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundpd_epi64(W, U, A, R) \ ((__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundpd_epi64(U, A, R) \ ((__m256i) \ __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundpd_epu32(A, R) \ ((__m128i) \ __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \ (__v4si) \ (_mm_undefined_si128 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundpd_epu32(W, U, A, R) \ ((__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \ (__v4si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundpd_epu32(U, A, R) \ ((__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \ (__v4si) \ (_mm_setzero_si128 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundpd_epu64(A, R) \ ((__m256i) \ __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundpd_epu64(W, U, A, R) \ ((__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundpd_epu64(U, A, R) \ ((__m256i) \ __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundph_epi32(A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundph_epi32(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \ (__v8si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundph_epi32(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundph_epi64(A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundph_epi64(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundph_epi64(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundph_epu32(A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundph_epu32(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \ (__v8si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundph_epu32(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundph_epu64(A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundph_epu64(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundph_epu64(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundph_epu16(A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) \ (_mm256_setzero_si256 ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_cvtt_roundph_epu16(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_cvtt_roundph_epu16(U, A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) \ (_mm256_setzero_si256 ()), \ (__mmask16) (U), \ (R))) #define _mm256_cvtt_roundph_epi16(A, R) \ ((__m256i) \ __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) \ (_mm256_setzero_si256 ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_cvtt_roundph_epi16(W, U, A, R) \ ((__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_cvtt_roundph_epi16(U, A, R)\ ((__m256i) \ __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \ (__v16hi) \ (_mm256_setzero_si256 ()), \ (__mmask16) (U), \ (R))) #define _mm256_cvtt_roundps_epi32(A, R) \ ((__m256i) \ __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \ (__v8si) \ (_mm256_undefined_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundps_epi32(W, U, A, R) \ ((__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \ (__v8si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundps_epi32(U, A, R) \ ((__m256i) \ __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundps_epi64(A, R) \ ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()),\ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundps_epi64(W, U, A, R) \ ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundps_epi64(U, A, R) \ ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()),\ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundps_epu32(A, R) \ ((__m256i) \ __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \ (__v8si) \ (_mm256_undefined_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundps_epu32(W, U, A, R) \ ((__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \ (__v8si) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundps_epu32(U, A, R) \ ((__m256i) \ __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \ (__v8si) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvtt_roundps_epu64(A, R) \ ((__m256i) \ __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvtt_roundps_epu64(W, U, A, R) \ ((__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \ (__v4di) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvtt_roundps_epu64(U, A, R) \ ((__m256i) \ __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \ (__v4di) \ (_mm256_setzero_si256 ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepu32_ph(A, R) \ ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepu32_ph(W, U, A, R) \ ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \ (__v8hf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepu32_ph(U, A, R) \ ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepu32_ps(A, R) \ ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepu32_ps(W, U, A, R) \ ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepu32_ps(U, A, R) \ ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepu64_pd(A, R) \ ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepu64_pd(W, U, A, R) \ ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepu64_pd(U, A, R) \ ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepu64_ph(A, R) \ ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepu64_ph(W, U, A, R) \ ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \ (__v8hf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepu64_ph(U, A, R) \ ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \ (__v8hf) \ (_mm_setzero_ph ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepu64_ps(A, R) \ ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \ (__v4sf) \ (_mm_setzero_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_cvt_roundepu64_ps(W, U, A, R) \ ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \ (__v4sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_cvt_roundepu64_ps(U, A, R) \ ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \ (__v4sf) \ (_mm_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_cvt_roundepu16_ph(A, R) \ ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_cvt_roundepu16_ph(W, U, A, R) \ ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_cvt_roundepu16_ph(U, A, R) \ ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_cvt_roundepi16_ph(A, R) \ ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_cvt_roundepi16_ph(W, U, A, R) \ ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_cvt_roundepi16_ph(U, A, R) \ ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_div_round_pd(A, B, R) \ ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_div_round_pd(W, U, A, B, R) \ ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_div_round_pd(U, A, B, R) \ ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_div_round_ph(A, B, R) \ ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_div_round_ph(W, U, A, B, R) \ ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_div_round_ph(U, A, B, R) \ ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_div_round_ps(A, B, R) \ ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_div_round_ps(W, U, A, B, R) \ ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_div_round_ps(U, A, B, R) \ ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_fcmadd_round_pch(A, B, D, R) \ (__m256h) __builtin_ia32_vfcmaddcph256_round ((A), (B), (D), (R)) #define _mm256_mask_fcmadd_round_pch(A, U, B, D, R) \ ((__m256h) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf)(A), \ (__v16hf)(B), \ (__v16hf)(D), \ (U), (R))) #define _mm256_mask3_fcmadd_round_pch(A, B, D, U, R) \ ((__m256h) __builtin_ia32_vfcmaddcph256_mask3_round ((A), (B), (D), (U), (R))) #define _mm256_maskz_fcmadd_round_pch(U, A, B, D, R) \ ((__m256h) __builtin_ia32_vfcmaddcph256_maskz_round ((A), (B), (D), (U), (R))) #define _mm256_fcmul_round_pch(A, B, R) \ ((__m256h) __builtin_ia32_vfcmulcph256_round ((__v16hf) (A), \ (__v16hf) (B), \ (R))) #define _mm256_mask_fcmul_round_pch(W, U, A, B, R) \ ((__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_fcmul_round_pch(U, A, B, R) \ ((__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_fixupimm_round_pd(A, B, D, C, R) \ ((__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4di) (D), \ (C), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_fixupimm_round_pd(A, U, B, D, C, R)\ ((__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4di) (D), \ (C), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_fixupimm_round_pd(U, A, B, D, C, R)\ ((__m256d) __builtin_ia32_fixupimmpd256_maskz_round ((__v4df) (A), \ (__v4df) (B), \ (__v4di) (D), \ (C), \ (__mmask8) (U), \ (R))) #define _mm256_fixupimm_round_ps(A, B, D, C, R)\ ((__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8si) (D), \ (C), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_fixupimm_round_ps(A, U, B, D, C, R)\ ((__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8si) (D), \ (C), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_fixupimm_round_ps(U, A, B, D, C, R)\ ((__m256) __builtin_ia32_fixupimmps256_maskz_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8si) (D), \ (C), \ (__mmask8) (U), \ (R))) #define _mm256_fmadd_round_pd(A, B, D, R) \ ((__m256d) __builtin_ia32_vfmaddpd256_mask_round (A, B, D, -1, R)) #define _mm256_mask_fmadd_round_pd(A, U, B, D, R) \ ((__m256d) __builtin_ia32_vfmaddpd256_mask_round (A, B, D, U, R)) #define _mm256_mask3_fmadd_round_pd(A, B, D, U, R) \ ((__m256d) __builtin_ia32_vfmaddpd256_mask3_round (A, B, D, U, R)) #define _mm256_maskz_fmadd_round_pd(U, A, B, D, R) \ ((__m256d) __builtin_ia32_vfmaddpd256_maskz_round (A, B, D, U, R)) #define _mm256_fmadd_round_ph(A, B, D, R) \ ((__m256h) __builtin_ia32_vfmaddph256_mask_round (A, B, D, -1, R)) #define _mm256_mask_fmadd_round_ph(A, U, B, D, R) \ ((__m256h) __builtin_ia32_vfmaddph256_mask_round (A, B, D, U, R)) #define _mm256_mask3_fmadd_round_ph(A, B, D, U, R) \ ((__m256h) __builtin_ia32_vfmaddph256_mask3_round (A, B, D, U, R)) #define _mm256_maskz_fmadd_round_ph(U, A, B, D, R) \ ((__m256h) __builtin_ia32_vfmaddph256_maskz_round (A, B, D, U, R)) #define _mm256_fmadd_round_ps(A, B, D, R) \ ((__m256)__builtin_ia32_vfmaddps256_mask_round (A, B, D, -1, R)) #define _mm256_mask_fmadd_round_ps(A, U, B, D, R) \ ((__m256)__builtin_ia32_vfmaddps256_mask_round (A, B, D, U, R)) #define _mm256_mask3_fmadd_round_ps(A, B, D, U, R) \ ((__m256)__builtin_ia32_vfmaddps256_mask3_round (A, B, D, U, R)) #define _mm256_maskz_fmadd_round_ps(U, A, B, D, R) \ ((__m256)__builtin_ia32_vfmaddps256_maskz_round (A, B, D, U, R)) #define _mm256_fmadd_round_pch(A, B, D, R) \ (__m256h) __builtin_ia32_vfmaddcph256_round ((A), (B), (D), (R)) #define _mm256_mask_fmadd_round_pch(A, U, B, D, R) \ ((__m256h) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (D), \ (U), (R))) #define _mm256_mask3_fmadd_round_pch(A, B, D, U, R) \ (__m256h) __builtin_ia32_vfmaddcph256_mask3_round ((A), (B), (D), (U), (R)) #define _mm256_maskz_fmadd_round_pch(U, A, B, D, R) \ (__m256h) __builtin_ia32_vfmaddcph256_maskz_round ((A), (B), (D), (U), (R)) #define _mm256_fmaddsub_round_pd(A, B, D, R) \ (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round (A, B, D, -1, R) #define _mm256_mask_fmaddsub_round_pd(A, U, B, D, R) \ (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round (A, B, D, U, R) #define _mm256_mask3_fmaddsub_round_pd(A, B, D, U, R) \ (__m256d)__builtin_ia32_vfmaddsubpd256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fmaddsub_round_pd(U, A, B, D, R) \ (__m256d)__builtin_ia32_vfmaddsubpd256_maskz_round (A, B, D, U, R) #define _mm256_fmaddsub_round_ph(A, B, D, R) \ ((__m256h)__builtin_ia32_vfmaddsubph256_mask_round ((A), (B), (D), -1, (R))) #define _mm256_mask_fmaddsub_round_ph(A, U, B, D, R) \ ((__m256h)__builtin_ia32_vfmaddsubph256_mask_round ((A), (B), (D), (U), (R))) #define _mm256_mask3_fmaddsub_round_ph(A, B, D, U, R) \ ((__m256h)__builtin_ia32_vfmaddsubph256_mask3_round ((A), (B), (D), (U), (R))) #define _mm256_maskz_fmaddsub_round_ph(U, A, B, D, R) \ ((__m256h)__builtin_ia32_vfmaddsubph256_maskz_round ((A), (B), (D), (U), (R))) #define _mm256_fmaddsub_round_ps(A, B, D, R) \ (__m256)__builtin_ia32_vfmaddsubps256_mask_round (A, B, D, -1, R) #define _mm256_mask_fmaddsub_round_ps(A, U, B, D, R) \ (__m256)__builtin_ia32_vfmaddsubps256_mask_round (A, B, D, U, R) #define _mm256_mask3_fmaddsub_round_ps(A, B, D, U, R) \ (__m256)__builtin_ia32_vfmaddsubps256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fmaddsub_round_ps(U, A, B, D, R) \ (__m256)__builtin_ia32_vfmaddsubps256_maskz_round (A, B, D, U, R) #define _mm256_fmsub_round_pd(A, B, D, R) \ (__m256d)__builtin_ia32_vfmsubpd256_mask_round (A, B, D, -1, R) #define _mm256_mask_fmsub_round_pd(A, U, B, D, R) \ (__m256d)__builtin_ia32_vfmsubpd256_mask_round (A, B, D, U, R) #define _mm256_mask3_fmsub_round_pd(A, B, D, U, R) \ (__m256d)__builtin_ia32_vfmsubpd256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fmsub_round_pd(U, A, B, D, R) \ (__m256d)__builtin_ia32_vfmsubpd256_maskz_round (A, B, D, U, R) #define _mm256_fmsub_round_ph(A, B, D, R) \ ((__m256h)__builtin_ia32_vfmsubph256_mask_round ((A), (B), (D), -1, (R))) #define _mm256_mask_fmsub_round_ph(A, U, B, D, R) \ ((__m256h)__builtin_ia32_vfmsubph256_mask_round ((A), (B), (D), (U), (R))) #define _mm256_mask3_fmsub_round_ph(A, B, D, U, R) \ ((__m256h)__builtin_ia32_vfmsubph256_mask3_round ((A), (B), (D), (U), (R))) #define _mm256_maskz_fmsub_round_ph(U, A, B, D, R) \ ((__m256h)__builtin_ia32_vfmsubph256_maskz_round ((A), (B), (D), (U), (R))) #define _mm256_fmsub_round_ps(A, B, D, R) \ (__m256)__builtin_ia32_vfmsubps256_mask_round (A, B, D, -1, R) #define _mm256_mask_fmsub_round_ps(A, U, B, D, R) \ (__m256)__builtin_ia32_vfmsubps256_mask_round (A, B, D, U, R) #define _mm256_mask3_fmsub_round_ps(A, B, D, U, R) \ (__m256)__builtin_ia32_vfmsubps256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fmsub_round_ps(U, A, B, D, R) \ (__m256)__builtin_ia32_vfmsubps256_maskz_round (A, B, D, U, R) #define _mm256_fmsubadd_round_pd(A, B, D, R) \ (__m256d)__builtin_ia32_vfmsubaddpd256_mask_round (A, B, D, -1, R) #define _mm256_mask_fmsubadd_round_pd(A, U, B, D, R) \ (__m256d)__builtin_ia32_vfmsubaddpd256_mask_round (A, B, D, U, R) #define _mm256_mask3_fmsubadd_round_pd(A, B, D, U, R) \ (__m256d)__builtin_ia32_vfmsubaddpd256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fmsubadd_round_pd(U, A, B, D, R) \ (__m256d)__builtin_ia32_vfmsubaddpd256_maskz_round (A, B, D, U, R) #define _mm256_fmsubadd_round_ph(A, B, D, R) \ ((__m256h)__builtin_ia32_vfmsubaddph256_mask_round ((A), (B), (D), -1, (R))) #define _mm256_mask_fmsubadd_round_ph(A, U, B, D, R) \ ((__m256h)__builtin_ia32_vfmsubaddph256_mask_round ((A), (B), (D), (U), (R))) #define _mm256_mask3_fmsubadd_round_ph(A, B, D, U, R) \ ((__m256h)__builtin_ia32_vfmsubaddph256_mask3_round ((A), (B), (D), (U), (R))) #define _mm256_maskz_fmsubadd_round_ph(U, A, B, D, R) \ ((__m256h)__builtin_ia32_vfmsubaddph256_maskz_round ((A), (B), (D), (U), (R))) #define _mm256_fmsubadd_round_ps(A, B, D, R) \ (__m256)__builtin_ia32_vfmsubaddps256_mask_round (A, B, D, -1, R) #define _mm256_mask_fmsubadd_round_ps(A, U, B, D, R) \ (__m256)__builtin_ia32_vfmsubaddps256_mask_round (A, B, D, U, R) #define _mm256_mask3_fmsubadd_round_ps(A, B, D, U, R) \ (__m256)__builtin_ia32_vfmsubaddps256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fmsubadd_round_ps(U, A, B, D, R) \ (__m256)__builtin_ia32_vfmsubaddps256_maskz_round (A, B, D, U, R) #define _mm256_fmul_round_pch(B, D, R) \ ((__m256h) __builtin_ia32_vfmulcph256_round ((__v16hf) (B), \ (__v16hf) (D), \ (R))) #define _mm256_mask_fmul_round_pch(A, U, B, D, R) \ ((__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) (B), \ (__v16hf) (D), \ (__v16hf) (A), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_fmul_round_pch(U, B, D, R) \ ((__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) (B), \ (__v16hf) (D), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_fnmadd_round_pd(A, B, D, R) \ (__m256d)__builtin_ia32_vfnmaddpd256_mask_round (A, B, D, -1, R) #define _mm256_mask_fnmadd_round_pd(A, U, B, D, R) \ (__m256d)__builtin_ia32_vfnmaddpd256_mask_round (A, B, D, U, R) #define _mm256_mask3_fnmadd_round_pd(A, B, D, U, R) \ (__m256d)__builtin_ia32_vfnmaddpd256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fnmadd_round_pd(U, A, B, D, R) \ (__m256d)__builtin_ia32_vfnmaddpd256_maskz_round (A, B, D, U, R) #define _mm256_fnmadd_round_ph(A, B, D, R) \ ((__m256h)__builtin_ia32_vfnmaddph256_mask_round ((A), (B), (D), -1, (R))) #define _mm256_mask_fnmadd_round_ph(A, U, B, D, R) \ ((__m256h)__builtin_ia32_vfnmaddph256_mask_round ((A), (B), (D), (U), (R))) #define _mm256_mask3_fnmadd_round_ph(A, B, D, U, R) \ ((__m256h)__builtin_ia32_vfnmaddph256_mask3_round ((A), (B), (D), (U), (R))) #define _mm256_maskz_fnmadd_round_ph(U, A, B, D, R) \ ((__m256h)__builtin_ia32_vfnmaddph256_maskz_round ((A), (B), (D), (U), (R))) #define _mm256_fnmadd_round_ps(A, B, D, R) \ (__m256)__builtin_ia32_vfnmaddps256_mask_round (A, B, D, -1, R) #define _mm256_mask_fnmadd_round_ps(A, U, B, D, R) \ (__m256)__builtin_ia32_vfnmaddps256_mask_round (A, B, D, U, R) #define _mm256_mask3_fnmadd_round_ps(A, B, D, U, R) \ (__m256)__builtin_ia32_vfnmaddps256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fnmadd_round_ps(U, A, B, D, R) \ (__m256)__builtin_ia32_vfnmaddps256_maskz_round (A, B, D, U, R) #define _mm256_fnmsub_round_pd(A, B, D, R) \ (__m256d)__builtin_ia32_vfnmsubpd256_mask_round (A, B, D, -1, R) #define _mm256_mask_fnmsub_round_pd(A, U, B, D, R) \ (__m256d)__builtin_ia32_vfnmsubpd256_mask_round (A, B, D, U, R) #define _mm256_mask3_fnmsub_round_pd(A, B, D, U, R) \ (__m256d)__builtin_ia32_vfnmsubpd256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fnmsub_round_pd(U, A, B, D, R) \ (__m256d)__builtin_ia32_vfnmsubpd256_maskz_round (A, B, D, U, R) #define _mm256_fnmsub_round_ph(A, B, D, R) \ ((__m256h)__builtin_ia32_vfnmsubph256_mask_round ((A), (B), (D), -1, (R))) #define _mm256_mask_fnmsub_round_ph(A, U, B, D, R) \ ((__m256h)__builtin_ia32_vfnmsubph256_mask_round ((A), (B), (D), (U), (R))) #define _mm256_mask3_fnmsub_round_ph(A, B, D, U, R) \ ((__m256h)__builtin_ia32_vfnmsubph256_mask3_round ((A), (B), (D), (U), (R))) #define _mm256_maskz_fnmsub_round_ph(U, A, B, D, R) \ ((__m256h)__builtin_ia32_vfnmsubph256_maskz_round ((A), (B), (D), (U), (R))) #define _mm256_fnmsub_round_ps(A, B, D, R) \ (__m256)__builtin_ia32_vfnmsubps256_mask_round (A, B, D, -1, R) #define _mm256_mask_fnmsub_round_ps(A, U, B, D, R) \ (__m256)__builtin_ia32_vfnmsubps256_mask_round (A, B, D, U, R) #define _mm256_mask3_fnmsub_round_ps(A, B, D, U, R) \ (__m256)__builtin_ia32_vfnmsubps256_mask3_round (A, B, D, U, R) #define _mm256_maskz_fnmsub_round_ps(U, A, B, D, R) \ (__m256)__builtin_ia32_vfnmsubps256_maskz_round (A, B, D, U, R) #define _mm256_getexp_round_pd(A, R) \ ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_getexp_round_pd(W, U, A, R) \ ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_getexp_round_pd(U, A, R) \ ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_getexp_round_ph(A, R)\ ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_getexp_round_ph(W, U, A, R)\ ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_getexp_round_ph(U, A, R)\ ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_getexp_round_ps(A, R)\ ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_getexp_round_ps(W, U, A, R)\ ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_getexp_round_ps(U, A, R)\ ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_getmant_round_pd(A, B, C, R) \ ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \ (int) (((C) << 2) | (B)), \ (__v4df) (__m256d) \ _mm256_undefined_pd (), \ (__mmask8)-1, \ (R))) #define _mm256_mask_getmant_round_pd(W, U, A, B, C, R) \ ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \ (int) (((C) << 2) | (B)), \ (__v4df) (__m256d) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_getmant_round_pd(U, A, B, C, R) \ ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \ (int) (((C) << 2) | (B)), \ (__v4df) (__m256d) \ _mm256_setzero_pd (), \ (__mmask8) (U), \ (R))) #define _mm256_getmant_round_ph(A, B, C, R) \ ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \ (int) (((C)<<2) | (B)), \ (__v16hf) (__m256h) \ _mm256_undefined_ph (), \ (__mmask16)-1, \ (R))) #define _mm256_mask_getmant_round_ph(W, U, A, B, C, R) \ ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \ (int) (((C)<<2) | (B)), \ (__v16hf) (__m256h) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_getmant_round_ph(U, A, B, C, R) \ ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \ (int) (((C)<<2) | (B)), \ (__v16hf) (__m256h) \ _mm256_setzero_ph (), \ (__mmask16) (U), \ (R))) #define _mm256_getmant_round_ps(A, B, C, R) \ ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \ (int) (((C)<<2) | (B)), \ (__v8sf) (__m256) \ _mm256_undefined_ps (), \ (__mmask8)-1, \ (R))) #define _mm256_mask_getmant_round_ps(W, U, A, B, C, R) \ ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \ (int) (((C)<<2) | (B)), \ (__v8sf) (__m256) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_getmant_round_ps(U, A, B, C, R) \ ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \ (int) (((C)<<2) | (B)), \ (__v8sf) (__m256) \ _mm256_setzero_ps (), \ (__mmask8) (U), \ (R))) #define _mm256_max_round_pd(A, B, R) \ ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_max_round_pd(W, U, A, B, R) \ ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_max_round_pd(U, A, B, R) \ ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_max_round_ph(A, B, R) \ ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_undefined_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_max_round_ph(W, U, A, B, R) \ ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_max_round_ph(U, A, B, R) \ ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_max_round_ps(A, B, R) \ ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_max_round_ps(W, U, A, B, R) \ ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_max_round_ps(U, A, B, R) \ ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_min_round_pd(A, B, R) \ ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_min_round_pd(W, U, A, B, R) \ ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_min_round_pd(U, A, B, R) \ ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_min_round_ph(A, B, R) \ ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_undefined_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_min_round_ph(W, U, A, B, R) \ ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_min_round_ph(U, A, B, R) \ ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_min_round_ps(A, B, R) \ ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_min_round_ps(W, U, A, B, R) \ ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_min_round_ps(U, A, B, R) \ ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_mul_round_pd(A, B, R) \ ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_mul_round_pd(W, U, A, B, R) \ ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_mul_round_pd(U, A, B, R) \ ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_mul_round_ph(A, B, R) \ ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_undefined_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_mul_round_ph(W, U, A, B, R) \ ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_mul_round_ph(U, A, B, R) \ ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_mul_round_ps(A, B, R) \ ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_mul_round_ps(W, U, A, B, R) \ ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_mul_round_ps(U, A, B, R) \ ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_range_round_pd(A, B, C, R) \ ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (C), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_range_round_pd(W, U, A, B, C, R) \ ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (C), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_range_round_pd(U, A, B, C, R) \ ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (C), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_range_round_ps(A, B, C, R) \ ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (C), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_range_round_ps(W, U, A, B, C, R) \ ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (C), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_range_round_ps(U, A, B, C, R) \ ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (C), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_reduce_round_pd(A, C, R) \ ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \ (C), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_reduce_round_pd(W, U, A, C, R) \ ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \ (C), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_reduce_round_pd(U, A, C, R) \ ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \ (C), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_reduce_round_ph(A, C, R) \ ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \ (C), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_reduce_round_ph(W, U, A, C, R) \ ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \ (C), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_reduce_round_ph(U, A, C, R) \ ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \ (C), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_reduce_round_ps(A, C, R) \ ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \ (C), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_reduce_round_ps(W, U, A, C, R) \ ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \ (C), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_reduce_round_ps(U, A, C, R) \ ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \ (C), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_roundscale_round_pd(A, C, R) \ ((__m256d) \ __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \ (C), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_roundscale_round_pd(W, U, A, C, R) \ ((__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \ (C), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_roundscale_round_pd(U, A, C, R) \ ((__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \ (C), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_roundscale_round_ph(A, C, R) \ ((__m256h) \ __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \ (C), \ (__v16hf) \ (_mm256_undefined_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_roundscale_round_ph(W, U, A, C, R) \ ((__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \ (C), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_roundscale_round_ph(U, A, C, R) \ ((__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \ (C), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_roundscale_round_ps(A, C, R) \ ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \ (C), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_roundscale_round_ps(W, U, A, C, R) \ ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \ (C), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_roundscale_round_ps(U, A, C, R) \ ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \ (C), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_scalef_round_pd(A, B, R) \ ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_scalef_round_pd(W, U, A, B, R) \ ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_scalef_round_pd(U, A, B, R) \ ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_scalef_round_ph(A, B, R) \ ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_undefined_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_scalef_round_ph(W, U, A, B, R) \ ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_scalef_round_ph(U, A, B, R) \ ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_scalef_round_ps(A, B, R) \ ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_scalef_round_ps(W, U, A, B, R) \ ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_scalef_round_ps(U, A, B, R) \ ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_sqrt_round_pd(A, R) \ ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_sqrt_round_pd(W, U, A, R) \ ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_sqrt_round_pd(U, A, R) \ ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_sqrt_round_ph(A, R) \ ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \ (__v16hf) \ (_mm256_undefined_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_sqrt_round_ph(W, U, A, R) \ ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_sqrt_round_ph(U, A, R) \ ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_sqrt_round_ps(A, R) \ ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_sqrt_round_ps(W, U, A, R) \ ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_sqrt_round_ps(U, A, R) \ ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #define _mm256_sub_round_pd(A, B, R) \ ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_undefined_pd ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_sub_round_pd(W, U, A, B, R) \ ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_sub_round_pd(U, A, B, R) \ ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \ (__v4df) (B), \ (__v4df) \ (_mm256_setzero_pd ()), \ (__mmask8) (U), \ (R))) #define _mm256_sub_round_ph(A, B, R) \ ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_undefined_ph ()), \ (__mmask16) (-1), \ (R))) #define _mm256_mask_sub_round_ph(W, U, A, B, R) \ ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) (W), \ (__mmask16) (U), \ (R))) #define _mm256_maskz_sub_round_ph(U, A, B, R) \ ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \ (__v16hf) (B), \ (__v16hf) \ (_mm256_setzero_ph ()), \ (__mmask16) (U), \ (R))) #define _mm256_sub_round_ps(A, B, R) \ ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_undefined_ps ()), \ (__mmask8) (-1), \ (R))) #define _mm256_mask_sub_round_ps(W, U, A, B, R) \ ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) (W), \ (__mmask8) (U), \ (R))) #define _mm256_maskz_sub_round_ps(U, A, B, R) \ ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \ (__v8sf) (B), \ (__v8sf) \ (_mm256_setzero_ps ()), \ (__mmask8) (U), \ (R))) #endif #define _mm256_cmul_round_pch(A, B, R) _mm256_fcmul_round_pch ((A), (B), (R)) #define _mm256_mask_cmul_round_pch(W, U, A, B, R) \ _mm256_mask_fcmul_round_pch ((W), (U), (A), (B), (R)) #define _mm256_maskz_cmul_round_pch(U, A, B, R) \ _mm256_maskz_fcmul_round_pch ((U), (A), (B), (R)) #define _mm256_mul_round_pch(A, B, R) _mm256_fmul_round_pch ((A), (B), (R)) #define _mm256_mask_mul_round_pch(W, U, A, B, R) \ _mm256_mask_fmul_round_pch ((W), (U), (A), (B), (R)) #define _mm256_maskz_mul_round_pch(U, A, B, R) \ _mm256_maskz_fmul_round_pch ((U), (A), (B), (R)) #ifdef __DISABLE_AVX10_2_256__ #undef __DISABLE_AVX10_2_256__ #pragma GCC pop_options #endif /* __DISABLE_AVX10_2_256__ */ #endif /* _AVX10_2ROUNDINGINTRIN_H_INCLUDED */