diff options
Diffstat (limited to 'gcc/config/i386')
63 files changed, 8551 insertions, 6596 deletions
diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h deleted file mode 100644 index 21e4b36..0000000 --- a/gcc/config/i386/avx10_2-512bf16intrin.h +++ /dev/null @@ -1,681 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512bf16intrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED -#define _AVX10_2_512BF16INTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_add_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_addbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_addbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_addbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_sub_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_subbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_subbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_subbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mul_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_mulbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_div_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_divbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_divbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_divbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_max_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_maxbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_min_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_minbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_minbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_minbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_scalef_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_scalefbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_rsqrt_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); - -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_sqrt_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_rcp_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_getexp_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_getexpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_getexpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -/* Intrinsics vrndscalebf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_roundscale_pbh (__m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_roundscale_pbh(A, B) \ - (__builtin_ia32_rndscalebf16512_mask ((A), (B), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_roundscale_pbh(A, B, C, D) \ - (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B))) - -#define _mm512_maskz_roundscale_pbh(A, B, C) \ - (__builtin_ia32_rndscalebf16512_mask ((B), (C), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vreducebf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_reduce_pbh (__m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_reduce_pbh(A, B) \ - (__builtin_ia32_reducebf16512_mask ((A), (B), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_reduce_pbh(A, B, C, D) \ - (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B))) - -#define _mm512_maskz_reduce_pbh(A, B, C) \ - (__builtin_ia32_reducebf16512_mask ((B), (C), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vgetmantbf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_getmant_pbh(A, B, C) \ - (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_getmant_pbh(A, B, C, D, E) \ - (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) - -#define _mm512_maskz_getmant_pbh(A, B, C, D) \ - (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vfpclassbf16. */ -#ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A, - const int __imm) -{ - return (__mmask32) - __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm) -{ - return (__mmask32) - __builtin_ia32_fpclassbf16512_mask (__A, __imm, - (__mmask32) -1); -} - -#else -#define _mm512_mask_fpclass_pbh_mask(U, X, C) \ - ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ - (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U))) - -#define _mm512_fpclass_pbh_mask(X, C) \ - ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ - (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1))) -#endif /* __OPIMTIZE__ */ - - -/* Intrinsics vcmpbf16. */ -#ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B, - const int __imm) -{ - return (__mmask32) - __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm) -{ - return (__mmask32) - __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, - (__mmask32) -1); -} - -#else -#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \ - ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A))) - -#define _mm512_cmp_pbh_mask(A, B, C) \ - ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1))) - -#endif /* __OPIMTIZE__ */ - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h deleted file mode 100644 index 611a40d..0000000 --- a/gcc/config/i386/avx10_2-512convertintrin.h +++ /dev/null @@ -1,572 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512convertintrin.h> directly; include <immintrin.h> instead." -#endif // _IMMINTRIN_H_INCLUDED - -#ifndef __AVX10_2_512CONVERTINTRIN_H_INCLUDED -#define __AVX10_2_512CONVERTINTRIN_H_INCLUDED - -#ifndef __AVX10_2__ -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtx2ps_ph (__m512 __A, __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, - __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) __W, - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) -1, - __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, - __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A, - __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - __R); -} - -#else -#define _mm512_cvtx_round2ps_ph(A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) \ - (_mm512_setzero_ph ()), \ - (__mmask32) (-1), \ - (R))) -#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) (W), \ - (__mmask32) (U), \ - (R))) -#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) \ - (_mm512_setzero_ph ()), \ - (__mmask32) (U), \ - (R))) -#endif /* __OPTIMIZE__ */ - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A, - __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvthf8_ph (__m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) - _mm512_undefined_ph (), - (__mmask32) -1); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) __W, - (__mmask32) __U); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) - _mm512_setzero_ph (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtph_bf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_ph_bf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtph_hf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_ph_hf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbf8_ph (__m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( - (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 ( - (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( - (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8)); -} - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* __AVX10_2_512CONVERTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512mediaintrin.h b/gcc/config/i386/avx10_2-512mediaintrin.h deleted file mode 100644 index 43271e7..0000000 --- a/gcc/config/i386/avx10_2-512mediaintrin.h +++ /dev/null @@ -1,514 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512mediaintrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef _AVX10_2_512MEDIAINTRIN_H_INCLUDED -#define _AVX10_2_512MEDIAINTRIN_H_INCLUDED - -#if !defined(__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_mask ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) -1); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A, - __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_mask ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) __U); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A, - __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_maskz ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) __U); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X, - (__v64qi) __Y, - __M); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X, - __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, - (__v64qi) __Y, - __M, - (__v32hi) __W, - __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X, - __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, - (__v64qi) __Y, - __M, - (__v32hi) _mm512_setzero_epi32 (), - __U); -} -#else -#define _mm512_mpsadbw_epu8(X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), (int)(M)) - -#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), \ - (int)(M), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)) - -#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), \ - (int)(M), \ - (__v32hi) _mm512_setzero_epi32 (), \ - (__mmask32)(U)) -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* __AVX10_2_512MEDIAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512minmaxintrin.h b/gcc/config/i386/avx10_2-512minmaxintrin.h deleted file mode 100644 index a743346..0000000 --- a/gcc/config/i386/avx10_2-512minmaxintrin.h +++ /dev/null @@ -1,489 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - This file is part of GCC. - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512minmaxintrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED -#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -#ifdef __OPTIMIZE__ -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf)(__m512bh) - _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf) __W, - (__mmask32) __U); -} - -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf)(__m512bh) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B, const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B, - const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C, - const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, __R); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B, const int __C, const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) __W, - (__mmask8) __U, __R); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B, - const int __C, const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_undefined_ph (), - (__mmask32) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A, - __m512h __B, const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) __W, - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B, - const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_undefined_ph (), - (__mmask32) -1, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A, - __m512h __B, const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) __W, - (__mmask32) __U, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B, - const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A, - __m512 __B, const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B, - const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A, - __m512 __B, const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) __W, - (__mmask16) __U, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B, - const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, __R); -} - -#else -#define _mm512_minmax_pbh(A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) \ - _mm512_setzero_si512 (), \ - (__mmask32) (-1))) - -#define _mm512_mask_minmax_pbh(W, U, A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) (W), \ - (__mmask32) (U))) - -#define _mm512_maskz_minmax_pbh(U, A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) \ - _mm512_setzero_si512 (), \ - (__mmask32) (U))) - -#define _mm512_minmax_round_pd(A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_undefined_pd (), \ - (__mmask8) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) (W), \ - (__mmask8) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_setzero_pd (), \ - (__mmask8) (U), \ - (int) (R))) - -#define _mm512_minmax_round_ph(A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_undefined_ph (), \ - (__mmask32) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) (W), \ - (__mmask32) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_setzero_ph (), \ - (__mmask32) (U), \ - (int) (R))) - -#define _mm512_minmax_round_ps(A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_undefined_ps (), \ - (__mmask16) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) (W), \ - (__mmask16) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_setzero_ps (), \ - (__mmask16) (U), \ - (int) (R))) - -#define _mm512_minmax_pd(A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_undefined_pd (), \ - (__mmask8) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_pd(W, U, A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) (W), \ - (__mmask8) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_pd(U, A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_setzero_pd (), \ - (__mmask8) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_minmax_ph(A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_undefined_ph (), \ - (__mmask32) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_ph(W, U, A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) (W), \ - (__mmask32) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_ph(U, A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_setzero_ph (), \ - (__mmask32) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_minmax_ps(A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_undefined_ps (), \ - (__mmask16) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_ps(W, U, A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) (W), \ - (__mmask16) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_ps(U, A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_setzero_ps (), \ - (__mmask16) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h deleted file mode 100644 index 215b7fd..0000000 --- a/gcc/config/i386/avx10_2-512satcvtintrin.h +++ /dev/null @@ -1,1575 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512satcvtintrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef _AVX10_2_512SATCVTINTRIN_H_INCLUDED -#define _AVX10_2_512SATCVTINTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_bf16_epi8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_bf16_epu8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_bf16_epi8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_bf16_epu8 (__m512bh __A) -{ - return (__m512i) - __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A) -{ - return (__m512i) - __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ph_epi8 (__m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ph_epu8 (__m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ps_epi8 (__m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ps_epu8 (__m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ph_epi8 (__m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ph_epu8 (__m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ps_epi8 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ps_epu8 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epi32 (__m512d __A) -{ - return (__m256i) - __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) -{ - return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A) -{ - return - (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epi64 (__m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) -{ - return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A) -{ - return - (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epu32 (__m512d __A) -{ - return (__m256i) - __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) -{ - return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A) -{ - return - (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epu64 (__m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) -{ - return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epi32 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epi64 (__m256 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) -{ - return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epu32 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epu64 (__m256 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) -{ - return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R) -{ - return (__m256i) - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R) -{ - return (__m256i) - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} -#else -#define _mm512_ipcvts_roundph_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvts_roundph_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvts_roundps_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvts_roundps_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvtts_roundph_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvtts_roundph_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvtts_roundps_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvtts_roundps_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epi32(A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_undefined_si256 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \ - ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_setzero_si256 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epi64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epu32(A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_undefined_si256 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \ - ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_setzero_si256 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epu64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epi32(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epi64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epu32(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epu64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512SATCVTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h index e6890fc..9560480 100644 --- a/gcc/config/i386/avx10_2bf16intrin.h +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -34,6 +34,32 @@ #define __DISABLE_AVX10_2__ #endif /* __AVX10_2__ */ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_addbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_addbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_addbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_pbh (__m256bh __A, __m256bh __B) @@ -86,6 +112,32 @@ _mm_maskz_add_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_subbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_subbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_subbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_pbh (__m256bh __A, __m256bh __B) @@ -138,6 +190,32 @@ _mm_maskz_sub_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_mulbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_pbh (__m256bh __A, __m256bh __B) @@ -190,6 +268,32 @@ _mm_maskz_mul_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_divbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_divbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_divbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_pbh (__m256bh __A, __m256bh __B) @@ -242,6 +346,32 @@ _mm_maskz_div_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_maxbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_pbh (__m256bh __A, __m256bh __B) @@ -294,6 +424,32 @@ _mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_minbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_minbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_minbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_pbh (__m256bh __A, __m256bh __B) @@ -346,6 +502,32 @@ _mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_scalefbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_scalef_pbh (__m256bh __A, __m256bh __B) @@ -398,6 +580,41 @@ _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -468,6 +685,41 @@ _mm_maskz_fmadd_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fmaddbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -537,6 +789,41 @@ _mm_maskz_fmsub_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fmsubbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -607,6 +894,41 @@ _mm_maskz_fnmadd_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fnmaddbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -677,6 +999,35 @@ _mm_maskz_fnmsub_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fnmsubbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); + +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rsqrt_pbh (__m256bh __A) @@ -733,6 +1084,34 @@ _mm_maskz_rsqrt_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sqrt_pbh (__m256bh __A) @@ -789,6 +1168,34 @@ _mm_maskz_sqrt_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rcp_pbh (__m256bh __A) @@ -845,6 +1252,33 @@ _mm_maskz_rcp_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_getexpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_getexpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getexp_pbh (__m256bh __A) @@ -903,6 +1337,34 @@ _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A) /* Intrinsics vrndscalebf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_pbh (__m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_roundscale_pbh (__m256bh __A, int B) @@ -962,6 +1424,19 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B) } #else +#define _mm512_roundscale_pbh(A, B) \ + (__builtin_ia32_rndscalebf16512_mask ((A), (B), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_roundscale_pbh(A, B, C, D) \ + (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B))) + +#define _mm512_maskz_roundscale_pbh(A, B, C) \ + (__builtin_ia32_rndscalebf16512_mask ((B), (C), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_roundscale_pbh(A, B) \ (__builtin_ia32_rndscalebf16256_mask ((A), (B), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -992,6 +1467,35 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B) /* Intrinsics vreducebf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_pbh (__m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_reduce_pbh (__m256bh __A, int B) @@ -1051,6 +1555,19 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B) } #else +#define _mm512_reduce_pbh(A, B) \ + (__builtin_ia32_reducebf16512_mask ((A), (B), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_reduce_pbh(A, B, C, D) \ + (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B))) + +#define _mm512_maskz_reduce_pbh(A, B, C) \ + (__builtin_ia32_reducebf16512_mask ((B), (C), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_reduce_pbh(A, B) \ (__builtin_ia32_reducebf16256_mask ((A), (B), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -1082,6 +1599,40 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B) /* Intrinsics vgetmantbf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B, @@ -1151,6 +1702,19 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A, } #else +#define _mm512_getmant_pbh(A, B, C) \ + (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_getmant_pbh(A, B, C, D, E) \ + (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) + +#define _mm512_maskz_getmant_pbh(A, B, C, D) \ + (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_getmant_pbh(A, B, C) \ (__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -1180,6 +1744,24 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A, /* Intrinsics vfpclassbf16. */ #ifdef __OPTIMIZE__ +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A, + const int __imm) +{ + return (__mmask32) + __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm) +{ + return (__mmask32) + __builtin_ia32_fpclassbf16512_mask (__A, __imm, + (__mmask32) -1); +} + extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fpclass_pbh_mask (__mmask16 __U, __m256bh __A, @@ -1214,6 +1796,14 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm) } #else +#define _mm512_mask_fpclass_pbh_mask(U, X, C) \ + ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ + (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U))) + +#define _mm512_fpclass_pbh_mask(X, C) \ + ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ + (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1))) + #define _mm256_mask_fpclass_pbh_mask(U, A, B) \ ((__mmask16) __builtin_ia32_fpclassbf16256_mask ((A), (B), (U))) @@ -1233,6 +1823,24 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm) /* Intrinsics vcmpbf16. */ #ifdef __OPTIMIZE__ +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B, + const int __imm) +{ + return (__mmask32) + __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm) +{ + return (__mmask32) + __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, + (__mmask32) -1); +} + extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A, @@ -1268,6 +1876,12 @@ _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm) } #else +#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \ + ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A))) + +#define _mm512_cmp_pbh_mask(A, B, C) \ + ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1))) + #define _mm256_mask_cmp_pbh_mask(A, B, C, D) \ ((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A))) diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h index 8cbdc66..f2fb98f 100644 --- a/gcc/config/i386/avx10_2convertintrin.h +++ b/gcc/config/i386/avx10_2convertintrin.h @@ -98,6 +98,103 @@ _mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B) (__mmask16) __U); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx2ps_ph (__m512 __A, __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, + __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) __W, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) -1, + __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + __R); +} + +#else +#define _mm512_cvtx_round2ps_ph(A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) \ + (_mm512_setzero_ph ()), \ + (__mmask32) (-1), \ + (R))) +#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) (W), \ + (__mmask32) (U), \ + (R))) +#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) \ + (_mm512_setzero_ph ()), \ + (__mmask32) (U), \ + (R))) +#endif /* __OPTIMIZE__ */ + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbiasph_bf8 (__m128i __A, __m128h __B) @@ -161,6 +258,39 @@ _mm256_maskz_cvtbiasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_biasph_bf8 (__m128i __A, __m128h __B) @@ -224,6 +354,39 @@ _mm256_maskz_cvts_biasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbiasph_hf8 (__m128i __A, __m128h __B) @@ -287,6 +450,39 @@ _mm256_maskz_cvtbiasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A, + __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_biasph_hf8 (__m128i __A, __m128h __B) @@ -350,6 +546,39 @@ _mm256_maskz_cvts_biasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt2ph_bf8 (__m128h __A, __m128h __B) @@ -416,6 +645,39 @@ _mm256_maskz_cvt2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_2ph_bf8 (__m128h __A, __m128h __B) @@ -482,6 +744,39 @@ _mm256_maskz_cvts_2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt2ph_hf8 (__m128h __A, __m128h __B) @@ -548,6 +843,39 @@ _mm256_maskz_cvt2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_2ph_hf8 (__m128h __A, __m128h __B) @@ -614,6 +942,39 @@ _mm256_maskz_cvts_2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvthf8_ph (__m128i __A) @@ -672,6 +1033,35 @@ _mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A) (__mmask16) __U); } +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvthf8_ph (__m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) + _mm512_undefined_ph (), + (__mmask32) -1); +} + +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) __W, + (__mmask32) __U); +} + +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) + _mm512_setzero_ph (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtph_bf8 (__m128h __A) @@ -730,6 +1120,35 @@ _mm256_maskz_cvtph_bf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_bf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_ph_bf8 (__m128h __A) @@ -788,6 +1207,35 @@ _mm256_maskz_cvts_ph_bf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_ph_bf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtph_hf8 (__m128h __A) @@ -846,6 +1294,35 @@ _mm256_maskz_cvtph_hf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_hf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_ph_hf8 (__m128h __A) @@ -904,6 +1381,35 @@ _mm256_maskz_cvts_ph_hf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_ph_hf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbf8_ph (__m128i __A) @@ -952,6 +1458,30 @@ _mm256_maskz_cvtbf8_ph (__mmask16 __U, __m128i __A) (__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8)); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbf8_ph (__m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( + (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 ( + (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( + (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8)); +} + #ifdef __DISABLE_AVX10_2__ #undef __DISABLE_AVX10_2__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx10_2mediaintrin.h b/gcc/config/i386/avx10_2mediaintrin.h index 0993e8e..7d30502 100644 --- a/gcc/config/i386/avx10_2mediaintrin.h +++ b/gcc/config/i386/avx10_2mediaintrin.h @@ -394,6 +394,198 @@ _mm256_maskz_dpbuuds_epi32 (__mmask8 __U, __m256i __W, (__mmask8) __U); } +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_dpwsud_epi32 (__m128i __W, __mmask8 __U, @@ -682,6 +874,233 @@ _mm256_maskz_dpwuuds_epi32 (__mmask8 __U, __m256i __W, (__mmask8) __U); } +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_mask ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A, + __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_mask ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A, + __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_maskz ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) __U); +} + extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_dpph_ps (__m256 __W, __m256h __A, __m256h __B) @@ -800,6 +1219,39 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X, (__v16hi) _mm256_setzero_si256 (), __U); } + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X, + (__v64qi) __Y, + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X, + __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, + (__v64qi) __Y, + __M, + (__v32hi) __W, + __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X, + __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, + (__v64qi) __Y, + __M, + (__v32hi) _mm512_setzero_epi32 (), + __U); +} #else #define _mm_mask_mpsadbw_epu8(W, U, X, Y, M) \ (__m128i) __builtin_ia32_mpsadbw128_mask ((__v16qi)(__m128i)(X), \ @@ -829,6 +1281,23 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X, (__v16hi) _mm256_setzero_si256 (), \ (__mmask16)(U)) +#define _mm512_mpsadbw_epu8(X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), (int)(M)) + +#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), \ + (int)(M), \ + (__v32hi)(__m512i)(W), \ + (__mmask32)(U)) + +#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), \ + (int)(M), \ + (__v32hi) _mm512_setzero_epi32 (), \ + (__mmask32)(U)) #endif #ifdef __DISABLE_AVX10_2__ diff --git a/gcc/config/i386/avx10_2minmaxintrin.h b/gcc/config/i386/avx10_2minmaxintrin.h index 0a4a253..f9fe14e 100644 --- a/gcc/config/i386/avx10_2minmaxintrin.h +++ b/gcc/config/i386/avx10_2minmaxintrin.h @@ -103,6 +103,43 @@ _mm256_maskz_minmax_pbh (__mmask16 __U, __m256bh __A, (__mmask16) __U); } +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf)(__m512bh) + _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf) __W, + (__mmask32) __U); +} + +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf)(__m512bh) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_pd (__m128d __A, __m128d __B, const int __C) @@ -169,6 +206,84 @@ _mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C) (__mmask8) __U); } +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C, + const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __C, const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __C, const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_ph (__m128h __A, __m128h __B, const int __C) @@ -235,6 +350,83 @@ _mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C) (__mmask16) __U); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_undefined_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A, + __m512h __B, const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) __W, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B, + const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_undefined_ph (), + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A, + __m512h __B, const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) __W, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_ps (__m128 __A, __m128 __B, const int __C) @@ -301,6 +493,83 @@ _mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C) (__mmask8) __U); } +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B, + const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_sd (__m128d __A, __m128d __B, const int __C) @@ -580,6 +849,29 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_si256 (), \ (__mmask16) (U))) +#define _mm512_minmax_pbh(A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) \ + _mm512_setzero_si512 (), \ + (__mmask32) (-1))) + +#define _mm512_mask_minmax_pbh(W, U, A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) (W), \ + (__mmask32) (U))) + +#define _mm512_maskz_minmax_pbh(U, A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) \ + _mm512_setzero_si512 (), \ + (__mmask32) (U))) + #define _mm_minmax_pd(A, B, C) \ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \ (__v2df) (B), \ @@ -626,6 +918,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_pd (), \ (__mmask8) (U))) +#define _mm512_minmax_pd(A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_undefined_pd (), \ + (__mmask8) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_pd(W, U, A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) (W), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_pd(U, A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_setzero_pd (), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_pd(A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_undefined_pd (), \ + (__mmask8) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) (W), \ + (__mmask8) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_setzero_pd (), \ + (__mmask8) (U), \ + (int) (R))) + #define _mm_minmax_ph(A, B, C) \ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \ (__v8hf) (B), \ @@ -672,6 +1016,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_ph (), \ (__mmask16) (U))) +#define _mm512_minmax_ph(A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_undefined_ph (), \ + (__mmask32) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_ph(W, U, A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) (W), \ + (__mmask32) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_ph(U, A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_setzero_ph (), \ + (__mmask32) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_ph(A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_undefined_ph (), \ + (__mmask32) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) (W), \ + (__mmask32) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_setzero_ph (), \ + (__mmask32) (U), \ + (int) (R))) + #define _mm_minmax_ps(A, B, C) \ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \ (__v4sf) (B), \ @@ -718,6 +1114,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_ps (), \ (__mmask8) (U))) +#define _mm512_minmax_ps(A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_undefined_ps (), \ + (__mmask16) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_ps(W, U, A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) (W), \ + (__mmask16) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_ps(U, A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_setzero_ps (), \ + (__mmask16) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_ps(A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_undefined_ps (), \ + (__mmask16) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) (W), \ + (__mmask16) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_setzero_ps (), \ + (__mmask16) (U), \ + (int) (R))) + #define _mm_minmax_round_sd(A, B, C, R) \ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \ (__v2df) (B), \ diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h index 78bcd72..c4fa19b 100644 --- a/gcc/config/i386/avx10_2satcvtintrin.h +++ b/gcc/config/i386/avx10_2satcvtintrin.h @@ -63,37 +63,6 @@ _mm_maskz_ipcvts_bf16_epi8 (__mmask8 __U, __m128bh __A) (__mmask8) __U); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_ipcvts_bf16_epi8 (__m256bh __A) -{ - return - (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) - _mm256_undefined_si256 (), - (__mmask16) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A) -{ - return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A) -{ - return - (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) - _mm256_setzero_si256 (), - (__mmask16) __U); -} - extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ipcvts_bf16_epu8 (__m128bh __A) @@ -127,6 +96,37 @@ _mm_maskz_ipcvts_bf16_epu8 (__mmask8 __U, __m128bh __A) extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvts_bf16_epi8 (__m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A) +{ + return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_ipcvts_bf16_epu8 (__m256bh __A) { return @@ -156,120 +156,66 @@ _mm256_maskz_ipcvts_bf16_epu8 (__mmask16 __U, __m256bh __A) (__mmask16) __U); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ph_epi8 (__m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) - _mm_undefined_si128 (), - (__mmask8) -1); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ph_epu8 (__m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) - _mm_undefined_si128 (), - (__mmask8) -1); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); -} - -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ps_epi8 (__m128 __A) +_mm512_ipcvts_bf16_epi8 (__m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) - _mm_undefined_si128 (), - (__mmask8) -1); + return + (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A) +_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A) +_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return + (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ps_epu8 (__m128 __A) +_mm512_ipcvts_bf16_epu8 (__m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) - _mm_undefined_si128 (), - (__mmask8) -1); + return + (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A) +_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A) +_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return + (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); } extern __inline __m128i @@ -390,6 +336,183 @@ _mm256_maskz_ipcvtts_bf16_epu8 (__mmask16 __U, __m256bh __A) (__mmask16) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_bf16_epi8 (__m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_bf16_epu8 (__m512bh __A) +{ + return (__m512i) + __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A) +{ + return (__m512i) + __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ph_epi8 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ph_epu8 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ps_epi8 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ps_epu8 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ipcvtts_ph_epi8 (__m128h __A) @@ -1234,6 +1357,1416 @@ _mm256_maskz_cvtts_ps_epu64 (__mmask8 __U, __m128 __A) (__mmask8) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ph_epi8 (__m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ph_epu8 (__m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ps_epi8 (__m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ps_epu8 (__m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ph_epi8 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ph_epu8 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ps_epi8 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ps_epu8 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epi32 (__m512d __A) +{ + return (__m256i) + __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A) +{ + return + (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epi64 (__m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) +{ + return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A) +{ + return + (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epu32 (__m512d __A) +{ + return (__m256i) + __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A) +{ + return + (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epu64 (__m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) +{ + return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epi32 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epi64 (__m256 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) +{ + return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epu32 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epu64 (__m256 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) +{ + return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} +#else +#define _mm512_ipcvts_roundph_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvts_roundph_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvts_roundps_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvts_roundps_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvtts_roundph_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvtts_roundph_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvtts_roundps_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvtts_roundps_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epi32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epi64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epu32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epu64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epi32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epi64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epu32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epu64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) +#endif + extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtts_sd_epi32 (__m128d __A) diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h index 6740109..6c087e6 100644 --- a/gcc/config/i386/avx512bf16intrin.h +++ b/gcc/config/i386/avx512bf16intrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BF16INTRIN_H_INCLUDED #define _AVX512BF16INTRIN_H_INCLUDED -#if !defined (__AVX512BF16__) || defined (__EVEX512__) +#if !defined (__AVX512BF16__) #pragma GCC push_options -#pragma GCC target("avx512bf16,no-evex512") +#pragma GCC target("avx512bf16") #define __DISABLE_AVX512BF16__ #endif /* __AVX512BF16__ */ @@ -42,17 +42,6 @@ _mm_cvtsbh_ss (__bf16 __A) return __builtin_ia32_cvtbf2sf (__A); } -#ifdef __DISABLE_AVX512BF16__ -#undef __DISABLE_AVX512BF16__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512BF16__ */ - -#if !defined (__AVX512BF16__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512bf16,evex512") -#define __DISABLE_AVX512BF16_512__ -#endif /* __AVX512BF16_512__ */ - /* Internal data types for implementing the intrinsics. */ typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64))); @@ -155,8 +144,8 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A) (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16))); } -#ifdef __DISABLE_AVX512BF16_512__ -#undef __DISABLE_AVX512BF16_512__ +#ifdef __DISABLE_AVX512BF16__ +#undef __DISABLE_AVX512BF16__ #pragma GCC pop_options #endif /* __DISABLE_AVX512BF16_512__ */ diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h index ffaceac..fd6d183 100644 --- a/gcc/config/i386/avx512bf16vlintrin.h +++ b/gcc/config/i386/avx512bf16vlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BF16VLINTRIN_H_INCLUDED #define _AVX512BF16VLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512BF16__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512BF16__) #pragma GCC push_options -#pragma GCC target("avx512bf16,avx512vl,no-evex512") +#pragma GCC target("avx512bf16,avx512vl") #define __DISABLE_AVX512BF16VL__ #endif /* __AVX512BF16__ */ diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h index 301f125..d7156f9 100644 --- a/gcc/config/i386/avx512bitalgintrin.h +++ b/gcc/config/i386/avx512bitalgintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BITALGINTRIN_H_INCLUDED #define _AVX512BITALGINTRIN_H_INCLUDED -#if !defined (__AVX512BITALG__) || !defined (__EVEX512__) +#if !defined (__AVX512BITALG__) #pragma GCC push_options -#pragma GCC target("avx512bitalg,evex512") +#pragma GCC target("avx512bitalg") #define __DISABLE_AVX512BITALG__ #endif /* __AVX512BITALG__ */ diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h index e4883cf..cf9cff6 100644 --- a/gcc/config/i386/avx512bitalgvlintrin.h +++ b/gcc/config/i386/avx512bitalgvlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BITALGVLINTRIN_H_INCLUDED #define _AVX512BITALGVLINTRIN_H_INCLUDED -#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || defined (__EVEX512__) +#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512bitalg,avx512vl,no-evex512") +#pragma GCC target("avx512bitalg,avx512vl") #define __DISABLE_AVX512BITALGVL__ #endif /* __AVX512BITALGVL__ */ diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index 47c4c03..5e9eeaa 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BWINTRIN_H_INCLUDED #define _AVX512BWINTRIN_H_INCLUDED -#if !defined (__AVX512BW__) || defined (__EVEX512__) +#if !defined (__AVX512BW__) #pragma GCC push_options -#pragma GCC target("avx512bw,no-evex512") +#pragma GCC target("avx512bw") #define __DISABLE_AVX512BW__ #endif /* __AVX512BW__ */ @@ -346,17 +346,6 @@ _kandn_mask64 (__mmask64 __A, __mmask64 __B) return (__mmask64) __builtin_ia32_kandndi ((__mmask64) __A, (__mmask64) __B); } -#ifdef __DISABLE_AVX512BW__ -#undef __DISABLE_AVX512BW__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512BW__ */ - -#if !defined (__AVX512BW__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512bw,evex512") -#define __DISABLE_AVX512BW_512__ -#endif /* __AVX512BW_512__ */ - /* Internal data types for implementing the intrinsics. */ typedef short __v32hi __attribute__ ((__vector_size__ (64))); typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \ @@ -3369,8 +3358,8 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N) #endif -#ifdef __DISABLE_AVX512BW_512__ -#undef __DISABLE_AVX512BW_512__ +#ifdef __DISABLE_AVX512BW__ +#undef __DISABLE_AVX512BW__ #pragma GCC pop_options #endif /* __DISABLE_AVX512BW_512__ */ diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h index 206cc49..5a92d25 100644 --- a/gcc/config/i386/avx512cdintrin.h +++ b/gcc/config/i386/avx512cdintrin.h @@ -30,7 +30,7 @@ #ifndef __AVX512CD__ #pragma GCC push_options -#pragma GCC target("avx512cd,evex512") +#pragma GCC target("avx512cd") #define __DISABLE_AVX512CD__ #endif /* __AVX512CD__ */ diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index 1d10225..a7766b5 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512DQINTRIN_H_INCLUDED #define _AVX512DQINTRIN_H_INCLUDED -#if !defined (__AVX512DQ__) || defined (__EVEX512__) +#if !defined (__AVX512DQ__) #pragma GCC push_options -#pragma GCC target("avx512dq,no-evex512") +#pragma GCC target("avx512dq") #define __DISABLE_AVX512DQ__ #endif /* __AVX512DQ__ */ @@ -639,17 +639,6 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) #endif -#ifdef __DISABLE_AVX512DQ__ -#undef __DISABLE_AVX512DQ__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512DQ__ */ - -#if !defined (__AVX512DQ__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512dq,evex512") -#define __DISABLE_AVX512DQ_512__ -#endif /* __AVX512DQ_512__ */ - extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_f64x2 (__m128d __A) @@ -2897,9 +2886,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #endif -#ifdef __DISABLE_AVX512DQ_512__ -#undef __DISABLE_AVX512DQ_512__ +#ifdef __DISABLE_AVX512DQ__ +#undef __DISABLE_AVX512DQ__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512DQ_512__ */ +#endif /* __DISABLE_AVX512DQ__ */ #endif /* _AVX512DQINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 9160787..4469f73 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512FINTRIN_H_INCLUDED #define _AVX512FINTRIN_H_INCLUDED -#if !defined (__AVX512F__) || defined (__EVEX512__) +#if !defined (__AVX512F__) #pragma GCC push_options -#pragma GCC target("avx512f,no-evex512") +#pragma GCC target("avx512f") #define __DISABLE_AVX512F__ #endif /* __AVX512F__ */ @@ -54,11 +54,12 @@ typedef enum _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ } _MM_MANTISSA_SIGN_ENUM; -/* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms - from AVX2 or before. We need to add them to prevent target option mismatch - when calling AVX512 intrins implemented with these intrins under no-evex512 - function attribute. All AVX512 intrins calling those AVX2 intrins or - before will change their calls to these AVX512 version. */ +/* These _mm{,256}_avx512* intrins are initially duplicated from their + _mm{,256}_* forms from AVX2 or before. At that time, e need to add them + to prevent target option mismatch when calling AVX512 intrins implemented + with these intrins under no-evex512 function attribute. Thess intrins will + still be here to avoid huge changes. All AVX512 intrins calling those AVX2 + intrins or before have changed their calls to these AVX512 version. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avx512_undefined_ps (void) { @@ -3802,17 +3803,6 @@ _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) #endif -#ifdef __DISABLE_AVX512F__ -#undef __DISABLE_AVX512F__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512F__ */ - -#if !defined (__AVX512F__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512f,evex512") -#define __DISABLE_AVX512F_512__ -#endif /* __AVX512F_512__ */ - /* Internal data types for implementing the intrinsics. */ typedef double __v8df __attribute__ ((__vector_size__ (64))); typedef float __v16sf __attribute__ ((__vector_size__ (64))); @@ -16609,9 +16599,9 @@ _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A) #undef __MM512_REDUCE_OP -#ifdef __DISABLE_AVX512F_512__ -#undef __DISABLE_AVX512F_512__ +#ifdef __DISABLE_AVX512F__ +#undef __DISABLE_AVX512F__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512F_512__ */ +#endif /* __DISABLE_AVX512F__ */ #endif /* _AVX512FINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index f158f87..471ec05 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512FP16INTRIN_H_INCLUDED #define _AVX512FP16INTRIN_H_INCLUDED -#if !defined (__AVX512FP16__) || defined (__EVEX512__) +#if !defined (__AVX512FP16__) #pragma GCC push_options -#pragma GCC target("avx512fp16,no-evex512") +#pragma GCC target("avx512fp16") #define __DISABLE_AVX512FP16__ #endif /* __AVX512FP16__ */ @@ -2852,17 +2852,6 @@ _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E) #define _mm_maskz_cmul_round_sch(U, A, B, R) \ _mm_maskz_fcmul_round_sch ((U), (A), (B), (R)) -#ifdef __DISABLE_AVX512FP16__ -#undef __DISABLE_AVX512FP16__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512FP16__ */ - -#if !defined (__AVX512FP16__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512fp16,evex512") -#define __DISABLE_AVX512FP16_512__ -#endif /* __AVX512FP16_512__ */ - typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64))); typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__)); typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \ @@ -7238,9 +7227,9 @@ _mm512_set1_pch (_Float16 _Complex __A) #define _mm512_maskz_cmul_round_pch(U, A, B, R) \ _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R)) -#ifdef __DISABLE_AVX512FP16_512__ -#undef __DISABLE_AVX512FP16_512__ +#ifdef __DISABLE_AVX512FP16__ +#undef __DISABLE_AVX512FP16__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512FP16_512__ */ +#endif /* __DISABLE_AVX512FP16__ */ #endif /* _AVX512FP16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index 59e6c88..cb98310 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -28,9 +28,9 @@ #ifndef __AVX512FP16VLINTRIN_H_INCLUDED #define __AVX512FP16VLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) #pragma GCC push_options -#pragma GCC target("avx512fp16,avx512vl,no-evex512") +#pragma GCC target("avx512fp16,avx512vl") #define __DISABLE_AVX512FP16VL__ #endif /* __AVX512FP16VL__ */ diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h index ed97350..56790c0 100644 --- a/gcc/config/i386/avx512ifmaintrin.h +++ b/gcc/config/i386/avx512ifmaintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512IFMAINTRIN_H_INCLUDED #define _AVX512IFMAINTRIN_H_INCLUDED -#if !defined (__AVX512IFMA__) || !defined (__EVEX512__) +#if !defined (__AVX512IFMA__) #pragma GCC push_options -#pragma GCC target("avx512ifma,evex512") +#pragma GCC target("avx512ifma") #define __DISABLE_AVX512IFMA__ #endif /* __AVX512IFMA__ */ diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h index 681bda3..6b849c8 100644 --- a/gcc/config/i386/avx512ifmavlintrin.h +++ b/gcc/config/i386/avx512ifmavlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512IFMAVLINTRIN_H_INCLUDED #define _AVX512IFMAVLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) #pragma GCC push_options -#pragma GCC target("avx512ifma,avx512vl,no-evex512") +#pragma GCC target("avx512ifma,avx512vl") #define __DISABLE_AVX512IFMAVL__ #endif /* __AVX512IFMAVL__ */ diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h index f5515a8..e8bfe1d 100644 --- a/gcc/config/i386/avx512vbmi2intrin.h +++ b/gcc/config/i386/avx512vbmi2intrin.h @@ -28,9 +28,9 @@ #ifndef __AVX512VBMI2INTRIN_H_INCLUDED #define __AVX512VBMI2INTRIN_H_INCLUDED -#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__) +#if !defined(__AVX512VBMI2__) #pragma GCC push_options -#pragma GCC target("avx512vbmi2,evex512") +#pragma GCC target("avx512vbmi2") #define __DISABLE_AVX512VBMI2__ #endif /* __AVX512VBMI2__ */ diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h index e9857ba..5cdfebd 100644 --- a/gcc/config/i386/avx512vbmi2vlintrin.h +++ b/gcc/config/i386/avx512vbmi2vlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED #define _AVX512VBMI2VLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) #pragma GCC push_options -#pragma GCC target("avx512vbmi2,avx512vl,no-evex512") +#pragma GCC target("avx512vbmi2,avx512vl") #define __DISABLE_AVX512VBMI2VL__ #endif /* __AVX512VBMIVL__ */ diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h index 901a2f7..5f5e342 100644 --- a/gcc/config/i386/avx512vbmiintrin.h +++ b/gcc/config/i386/avx512vbmiintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VBMIINTRIN_H_INCLUDED #define _AVX512VBMIINTRIN_H_INCLUDED -#if !defined (__AVX512VBMI__) || !defined (__EVEX512__) +#if !defined (__AVX512VBMI__) #pragma GCC push_options -#pragma GCC target("avx512vbmi,evex512") +#pragma GCC target("avx512vbmi") #define __DISABLE_AVX512VBMI__ #endif /* __AVX512VBMI__ */ diff --git a/gcc/config/i386/avx512vbmivlintrin.h b/gcc/config/i386/avx512vbmivlintrin.h index 90cd590..037ea93 100644 --- a/gcc/config/i386/avx512vbmivlintrin.h +++ b/gcc/config/i386/avx512vbmivlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VBMIVLINTRIN_H_INCLUDED #define _AVX512VBMIVLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) #pragma GCC push_options -#pragma GCC target("avx512vbmi,avx512vl,no-evex512") +#pragma GCC target("avx512vbmi,avx512vl") #define __DISABLE_AVX512VBMIVL__ #endif /* __AVX512VBMIVL__ */ diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 9f0a5b4..537e408 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VLBWINTRIN_H_INCLUDED #define _AVX512VLBWINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512BW__) #pragma GCC push_options -#pragma GCC target("avx512vl,avx512bw,no-evex512") +#pragma GCC target("avx512vl,avx512bw") #define __DISABLE_AVX512VLBW__ #endif /* __AVX512VLBW__ */ diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h index 3b23d4a..5783dbe 100644 --- a/gcc/config/i386/avx512vldqintrin.h +++ b/gcc/config/i386/avx512vldqintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VLDQINTRIN_H_INCLUDED #define _AVX512VLDQINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) #pragma GCC push_options -#pragma GCC target("avx512vl,avx512dq,no-evex512") +#pragma GCC target("avx512vl,avx512dq") #define __DISABLE_AVX512VLDQ__ #endif /* __AVX512VLDQ__ */ diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 4451a1f..50930cd 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VLINTRIN_H_INCLUDED #define _AVX512VLINTRIN_H_INCLUDED -#if !defined (__AVX512VL__) || defined (__EVEX512__) +#if !defined (__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512vl,no-evex512") +#pragma GCC target("avx512vl") #define __DISABLE_AVX512VL__ #endif /* __AVX512VL__ */ @@ -13650,7 +13650,7 @@ _mm256_permutex_pd (__m256d __X, const int __M) #if !defined (__AVX512CD__) || !defined (__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512vl,avx512cd,no-evex512") +#pragma GCC target("avx512vl,avx512cd") #define __DISABLE_AVX512VLCD__ #endif diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h index 5d0eaff..fe7b663 100644 --- a/gcc/config/i386/avx512vnniintrin.h +++ b/gcc/config/i386/avx512vnniintrin.h @@ -28,9 +28,9 @@ #ifndef __AVX512VNNIINTRIN_H_INCLUDED #define __AVX512VNNIINTRIN_H_INCLUDED -#if !defined(__AVX512VNNI__) || !defined (__EVEX512__) +#if !defined(__AVX512VNNI__) #pragma GCC push_options -#pragma GCC target("avx512vnni,evex512") +#pragma GCC target("avx512vnni") #define __DISABLE_AVX512VNNI__ #endif /* __AVX512VNNI__ */ diff --git a/gcc/config/i386/avx512vnnivlintrin.h b/gcc/config/i386/avx512vnnivlintrin.h index 7774bbd..01c3c91 100644 --- a/gcc/config/i386/avx512vnnivlintrin.h +++ b/gcc/config/i386/avx512vnnivlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VNNIVLINTRIN_H_INCLUDED #define _AVX512VNNIVLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) #pragma GCC push_options -#pragma GCC target("avx512vnni,avx512vl,no-evex512") +#pragma GCC target("avx512vnni,avx512vl") #define __DISABLE_AVX512VNNIVL__ #endif /* __AVX512VNNIVL__ */ diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h index e170cf5..50f7ead 100644 --- a/gcc/config/i386/avx512vp2intersectintrin.h +++ b/gcc/config/i386/avx512vp2intersectintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED #define _AVX512VP2INTERSECTINTRIN_H_INCLUDED -#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__) +#if !defined(__AVX512VP2INTERSECT__) #pragma GCC push_options -#pragma GCC target("avx512vp2intersect,evex512") +#pragma GCC target("avx512vp2intersect") #define __DISABLE_AVX512VP2INTERSECT__ #endif /* __AVX512VP2INTERSECT__ */ diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h index afdd2da..3e0a8ab 100644 --- a/gcc/config/i386/avx512vp2intersectvlintrin.h +++ b/gcc/config/i386/avx512vp2intersectvlintrin.h @@ -28,10 +28,9 @@ #ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED #define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED -#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__) \ - || defined (__EVEX512__) +#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512vp2intersect,avx512vl,no-evex512") +#pragma GCC target("avx512vp2intersect,avx512vl") #define __DISABLE_AVX512VP2INTERSECTVL__ #endif /* __AVX512VP2INTERSECTVL__ */ diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h index 3357255..e4b89ea 100644 --- a/gcc/config/i386/avx512vpopcntdqintrin.h +++ b/gcc/config/i386/avx512vpopcntdqintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED #define _AVX512VPOPCNTDQINTRIN_H_INCLUDED -#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__) +#if !defined (__AVX512VPOPCNTDQ__) #pragma GCC push_options -#pragma GCC target("avx512vpopcntdq,evex512") +#pragma GCC target("avx512vpopcntdq") #define __DISABLE_AVX512VPOPCNTDQ__ #endif /* __AVX512VPOPCNTDQ__ */ diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h index 17d836f..8eb1d42 100644 --- a/gcc/config/i386/avx512vpopcntdqvlintrin.h +++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h @@ -28,10 +28,9 @@ #ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED #define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED -#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) \ - || defined (__EVEX512__) +#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512vpopcntdq,avx512vl,no-evex512") +#pragma GCC target("avx512vpopcntdq,avx512vl") #define __DISABLE_AVX512VPOPCNTDQVL__ #endif /* __AVX512VPOPCNTDQVL__ */ diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h index 3ddcbec..0a3173c 100644 --- a/gcc/config/i386/cygming.h +++ b/gcc/config/i386/cygming.h @@ -28,16 +28,15 @@ along with GCC; see the file COPYING3. If not see #undef TARGET_SEH #define TARGET_SEH (TARGET_64BIT_MS_ABI && flag_unwind_tables) +#undef PREFERRED_STACK_BOUNDARY_DEFAULT +#define PREFERRED_STACK_BOUNDARY_DEFAULT \ + (TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY) + /* Win64 with SEH cannot represent DRAP stack frames. Disable its use. Force the use of different mechanisms to allocate aligned local data. */ #undef MAX_STACK_ALIGNMENT #define MAX_STACK_ALIGNMENT (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT) -/* 32-bit Windows aligns the stack on a 4-byte boundary but SSE instructions - may require 16-byte alignment. */ -#undef STACK_REALIGN_DEFAULT -#define STACK_REALIGN_DEFAULT TARGET_SSE - /* Support hooks for SEH. */ #undef TARGET_ASM_UNWIND_EMIT #define TARGET_ASM_UNWIND_EMIT i386_pe_seh_unwind_emit @@ -247,9 +246,10 @@ do { \ #undef ASM_OUTPUT_LABELREF #define ASM_OUTPUT_LABELREF(STREAM, NAME) \ do { \ + const char *prefix = ""; \ if ((NAME)[0] != FASTCALL_PREFIX) \ - fputs (user_label_prefix, (STREAM)); \ - fputs ((NAME), (STREAM)); \ + prefix = user_label_prefix; \ + ix86_asm_output_labelref ((STREAM), prefix, (NAME)); \ } while (0) /* This does much the same in memory rather than to a stream. */ diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 1ff05e5..fe71f55 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -374,33 +374,6 @@ detect_caches_intel (bool xeon_mp, unsigned max_level, #define has_feature(f) \ has_cpu_feature (&cpu_model, cpu_features2, f) -/* We will emit a warning when using AVX10.1 and AVX512 options with one - enabled and the other disabled. Add this function to avoid push "-mno-" - options under this scenario for -march=native. */ - -bool check_avx512_features (__processor_model &cpu_model, - unsigned int (&cpu_features2)[SIZE_OF_CPU_FEATURES], - const enum processor_features feature) -{ - if (has_feature (FEATURE_AVX10_1_256) - && ((feature == FEATURE_AVX512F) - || (feature == FEATURE_AVX512CD) - || (feature == FEATURE_AVX512DQ) - || (feature == FEATURE_AVX512BW) - || (feature == FEATURE_AVX512VL) - || (feature == FEATURE_AVX512IFMA) - || (feature == FEATURE_AVX512VBMI) - || (feature == FEATURE_AVX512VBMI2) - || (feature == FEATURE_AVX512VNNI) - || (feature == FEATURE_AVX512VPOPCNTDQ) - || (feature == FEATURE_AVX512BITALG) - || (feature == FEATURE_AVX512FP16) - || (feature == FEATURE_AVX512BF16))) - return false; - - return true; -} - /* This will be called by the spec parser in gcc.cc when it sees a %:local_cpu_detect(args) construct. Currently it will be called with either "arch [32|64]" or "tune [32|64]" as argument @@ -627,7 +600,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (has_feature (FEATURE_AVX512F)) { /* Assume Diamond Rapids. */ - if (has_feature (FEATURE_AMX_TRANSPOSE)) + if (has_feature (FEATURE_AMX_FP8)) cpu = "diamondrapids"; /* Assume Granite Rapids D. */ else if (has_feature (FEATURE_AMX_COMPLEX)) @@ -909,12 +882,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) options = concat (options, " ", isa_names_table[i].option, NULL); } - /* Never push -mno-avx10.1-{256,512} under -march=native to - avoid unnecessary warnings when building libraries. */ - else if (isa_names_table[i].feature != FEATURE_AVX10_1_256 - && isa_names_table[i].feature != FEATURE_AVX10_1 - && check_avx512_features (cpu_model, cpu_features2, - isa_names_table[i].feature)) + else options = concat (options, neg_option, isa_names_table[i].option + 2, NULL); } diff --git a/gcc/config/i386/gcc-auto-profile b/gcc/config/i386/gcc-auto-profile index 528b34e..0e9e5fe 100755 --- a/gcc/config/i386/gcc-auto-profile +++ b/gcc/config/i386/gcc-auto-profile @@ -24,8 +24,16 @@ if [ "$1" = "--all" ] ; then shift fi -if ! grep -q Intel /proc/cpuinfo ; then - echo >&2 "Only Intel CPUs supported" +if grep -q AuthenticAMD /proc/cpuinfo ; then + vendor=AMD + if ! grep -q " brs" /proc/cpuinfo && ! grep -q amd_lbr_v2 /proc/cpuinfo ; then + echo >&2 "AMD CPU with brs (Zen 3) or amd_lbr_v2 (Zen 4+) feature is required" + exit 1 + fi +elif grep -q Intel /proc/cpuinfo ; then + vendor=Intel +else + echo >&2 "Only AMD and Intel CPUs supported" exit 1 fi @@ -33,7 +41,7 @@ if grep -q hypervisor /proc/cpuinfo ; then echo >&2 "Warning: branch profiling may not be functional in VMs" fi -case `grep -E -q "^cpu family\s*: 6" /proc/cpuinfo && +case `test $vendor = Intel && grep -E -q "^cpu family\s*: 6" /proc/cpuinfo && grep -E "^model\s*:" /proc/cpuinfo | head -n1` in model*:\ 46|\ model*:\ 30|\ @@ -82,6 +90,8 @@ model*:\ 126|\ model*:\ 167|\ model*:\ 140|\ model*:\ 141|\ +model*:\ 143|\ +model*:\ 207|\ model*:\ 106|\ model*:\ 108|\ model*:\ 173|\ @@ -89,15 +99,20 @@ model*:\ 174) E="cpu/event=0xc4,umask=0x20/$FLAGS" ;; model*:\ 134|\ model*:\ 150|\ model*:\ 156) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;; -model*:\ 143|\ -model*:\ 207) E="cpu/event=0xc4,umask=0x20/p$FLAGS" ;; -model*:\ 190) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;; +model*:\ 190|\ +model*:\ 175|\ +model*:\ 182) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;; model*:\ 190) E="cpu/event=0xc4,umask=0xfe/$FLAGS" ;; *) if perf list br_inst_retired | grep -q br_inst_retired.near_taken ; then E=br_inst_retired.near_taken:p + elif perf list ex_ret_brn_tkn | grep -q ex_ret_brn_tkn ; then + E=ex_ret_brn_tkn:P$FLAGS + elif $vendor = Intel ; then +echo >&2 "Unknown Intel CPU. Run contrib/gen_autofdo_event.py --all --script to update script." + exit 1 else -echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script." +echo >&2 "AMD CPU without support for ex_ret_brn_tkn event" exit 1 fi ;; esac diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h index c7e21e6..bc433c2 100644 --- a/gcc/config/i386/gfniintrin.h +++ b/gcc/config/i386/gfniintrin.h @@ -297,9 +297,9 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B, #pragma GCC pop_options #endif /* __GFNIAVX512VLBW__ */ -#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__) +#if !defined(__GFNI__) || !defined(__AVX512F__) #pragma GCC push_options -#pragma GCC target("gfni,avx512f,evex512") +#pragma GCC target("gfni,avx512f") #define __DISABLE_GFNIAVX512F__ #endif /* __GFNIAVX512F__ */ @@ -341,9 +341,9 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C) #pragma GCC pop_options #endif /* __GFNIAVX512F__ */ -#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__) +#if !defined(__GFNI__) || !defined(__AVX512BW__) #pragma GCC push_options -#pragma GCC target("gfni,avx512bw,evex512") +#pragma GCC target("gfni,avx512bw") #define __DISABLE_GFNIAVX512FBW__ #endif /* __GFNIAVX512FBW__ */ diff --git a/gcc/config/i386/host-mingw32.cc b/gcc/config/i386/host-mingw32.cc index e083f49..87804a5 100644 --- a/gcc/config/i386/host-mingw32.cc +++ b/gcc/config/i386/host-mingw32.cc @@ -135,7 +135,6 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd, and earlier, backslashes are invalid in object name. So, we need to check if we are on Windows2000 or higher. */ OSVERSIONINFO version_info; - int r; version_info.dwOSVersionInfoSize = sizeof (version_info); @@ -169,25 +168,24 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd, return -1; } - /* Retry five times, as here might occure a race with multiple gcc's - instances at same time. */ - for (r = 0; r < 5; r++) - { - mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset, - size, addr); - if (mmap_addr == addr) - break; - if (r != 4) - Sleep (500); - } - - if (mmap_addr != addr) + /* Try mapping the file at `addr`. */ + mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset, + size, addr); + if (mmap_addr == NULL) { - w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx"); - CloseHandle(mmap_handle); - return -1; + /* We could not map the file at its original address, so let the + system choose a different one. The PCH can be relocated later. */ + mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset, + size, NULL); + if (mmap_addr == NULL) + { + w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx"); + CloseHandle(mmap_handle); + return -1; + } } + addr = mmap_addr; return 1; } diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index a142711..fe42c6436 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -204,53 +204,53 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstored256, "__builtin_ia32_mas BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI) /* AVX512F */ -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loaddf_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadsf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storedf_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI) @@ -297,14 +297,14 @@ BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_si, BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_di, "__builtin_ia32_cmpccxadd64", IX86_BUILTIN_CMPCCXADD64, UNKNOWN, (int) LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT) /* AVX512BW */ -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI) /* AVX512VP2INTERSECT */ -BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI) -BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI) +BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI) +BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd256", IX86_BUILTIN_2INTERSECTD256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq256", IX86_BUILTIN_2INTERSECTQ256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4DI_V4DI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd128", IX86_BUILTIN_2INTERSECTD128, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4SI_V4SI) @@ -411,9 +411,9 @@ BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) /* AVX512FP16 */ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI) @@ -434,17 +434,17 @@ BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_B BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED) /* VBMI2 */ -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev32qi_mask, "__builtin_ia32_compressstoreuqi256_mask", IX86_BUILTIN_PCOMPRESSBSTORE256, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16qi_mask, "__builtin_ia32_compressstoreuqi128_mask", IX86_BUILTIN_PCOMPRESSBSTORE128, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16hi_mask, "__builtin_ia32_compressstoreuhi256_mask", IX86_BUILTIN_PCOMPRESSWSTORE256, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev8hi_mask, "__builtin_ia32_compressstoreuhi128_mask", IX86_BUILTIN_PCOMPRESSWSTORE128, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandloadqi256_mask", IX86_BUILTIN_PEXPANDBLOAD256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandloadqi256_maskz", IX86_BUILTIN_PEXPANDBLOAD256Z, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) @@ -1384,230 +1384,230 @@ BDESC (OPTION_MASK_ISA_BMI2, 0, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si" BDESC (OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64) /* AVX512F */ -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movdf_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND) /* Mask arithmetic operations */ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_kashiftqi, "__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST) @@ -2433,136 +2433,136 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI) /* AVX512DQ. */ -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI) /* AVX512BW. */ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) /* AVX512IFMA */ -BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) @@ -2577,13 +2577,13 @@ BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) /* AVX512VBMI */ -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) @@ -2594,16 +2594,16 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) /* VBMI2 */ -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv32qi_mask, "__builtin_ia32_compressqi256_mask", IX86_BUILTIN_PCOMPRESSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16qi_mask, "__builtin_ia32_compressqi128_mask", IX86_BUILTIN_PCOMPRESSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16hi_mask, "__builtin_ia32_compresshi256_mask", IX86_BUILTIN_PCOMPRESSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv8hi_mask, "__builtin_ia32_compresshi128_mask", IX86_BUILTIN_PCOMPRESSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandqi256_mask", IX86_BUILTIN_PEXPANDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandqi256_maskz", IX86_BUILTIN_PEXPANDB256Z, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16qi_mask, "__builtin_ia32_expandqi128_mask", IX86_BUILTIN_PEXPANDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) @@ -2612,64 +2612,64 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expan BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_maskz, "__builtin_ia32_expandhi256_maskz", IX86_BUILTIN_PEXPANDW256Z, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandhi128_mask", IX86_BUILTIN_PEXPANDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandhi128_maskz", IX86_BUILTIN_PEXPANDW128Z, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi, "__builtin_ia32_vpshrd_v16hi", IX86_BUILTIN_VPSHRDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi_mask, "__builtin_ia32_vpshrd_v16hi_mask", IX86_BUILTIN_VPSHRDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi, "__builtin_ia32_vpshrd_v8hi", IX86_BUILTIN_VPSHRDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi_mask, "__builtin_ia32_vpshrd_v8hi_mask", IX86_BUILTIN_VPSHRDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si, "__builtin_ia32_vpshrd_v8si", IX86_BUILTIN_VPSHRDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si_mask, "__builtin_ia32_vpshrd_v8si_mask", IX86_BUILTIN_VPSHRDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si, "__builtin_ia32_vpshrd_v4si", IX86_BUILTIN_VPSHRDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si_mask, "__builtin_ia32_vpshrd_v4si_mask", IX86_BUILTIN_VPSHRDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di, "__builtin_ia32_vpshrd_v4di", IX86_BUILTIN_VPSHRDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di_mask, "__builtin_ia32_vpshrd_v4di_mask", IX86_BUILTIN_VPSHRDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di, "__builtin_ia32_vpshrd_v2di", IX86_BUILTIN_VPSHRDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di_mask, "__builtin_ia32_vpshrd_v2di_mask", IX86_BUILTIN_VPSHRDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi, "__builtin_ia32_vpshld_v16hi", IX86_BUILTIN_VPSHLDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi_mask, "__builtin_ia32_vpshld_v16hi_mask", IX86_BUILTIN_VPSHLDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi, "__builtin_ia32_vpshld_v8hi", IX86_BUILTIN_VPSHLDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi_mask, "__builtin_ia32_vpshld_v8hi_mask", IX86_BUILTIN_VPSHLDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si, "__builtin_ia32_vpshld_v8si", IX86_BUILTIN_VPSHLDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si_mask, "__builtin_ia32_vpshld_v8si_mask", IX86_BUILTIN_VPSHLDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si, "__builtin_ia32_vpshld_v4si", IX86_BUILTIN_VPSHLDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si_mask, "__builtin_ia32_vpshld_v4si_mask", IX86_BUILTIN_VPSHLDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di, "__builtin_ia32_vpshld_v4di", IX86_BUILTIN_VPSHLDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di_mask, "__builtin_ia32_vpshld_v4di_mask", IX86_BUILTIN_VPSHLDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di, "__builtin_ia32_vpshld_v2di", IX86_BUILTIN_VPSHLDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) @@ -2677,27 +2677,27 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshr BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) @@ -2706,20 +2706,20 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshl BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) /* GFNI */ -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineinvqb_v32qi, "__builtin_ia32_vgf2p8affineinvqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEINVQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v32qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineinvqb_v16qi, "__builtin_ia32_vgf2p8affineinvqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEINVQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineinvqb_v16qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineqb_v32qi, "__builtin_ia32_vgf2p8affineqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v32qi_mask, "__builtin_ia32_vgf2p8affineqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineqb_v16qi, "__builtin_ia32_vgf2p8affineqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineqb_v16qi_mask, "__builtin_ia32_vgf2p8affineqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8mulb_v32qi, "__builtin_ia32_vgf2p8mulb_v32qi", IX86_BUILTIN_VGF2P8MULB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v32qi_mask, "__builtin_ia32_vgf2p8mulb_v32qi_mask", IX86_BUILTIN_VGF2P8MULB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) @@ -2727,9 +2727,9 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v /* AVX512_VNNI */ -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) @@ -2737,9 +2737,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) @@ -2747,9 +2747,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) @@ -2757,9 +2757,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) @@ -2798,13 +2798,13 @@ BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpd /* VPCLMULQDQ */ BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) -BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) /* VPOPCNTDQ */ -BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI) -BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI) BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI) @@ -2816,21 +2816,21 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_v BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI) /* BITALG */ -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI) -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI) -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv16qi_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI) @@ -2840,39 +2840,39 @@ BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_B /* VAES. */ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) /* BF16 */ -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf, "__builtin_ia32_cvtne2ps2bf16_v16bf", IX86_BUILTIN_CVTNE2PS2BF16_V16BF, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_mask, "__builtin_ia32_cvtne2ps2bf16_v16bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASK, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_V16BF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v16bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf, "__builtin_ia32_cvtne2ps2bf16_v8bf", IX86_BUILTIN_CVTNE2PS2BF16_V8BF, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_mask, "__builtin_ia32_cvtne2ps2bf16_v8bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_V8BF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v8bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2BF16_V8SF, UNKNOWN, (int) V8BF_FTYPE_V8SF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V8SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V8SF_V8BF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2BF16_V4SF, UNKNOWN, (int) V8BF_FTYPE_V4SF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V4SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V8BF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V4SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPBF16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPBF16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPBF16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI) @@ -2885,40 +2885,40 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_extendbfsf2_1, "__builtin_ia32_cvtbf2sf /* AVX512FP16. */ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_SUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_SUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_MULPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_MULPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_DIVPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_DIVPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_ADDSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_SUBSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_MULSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_DIVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_MAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_MAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_MINPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_MINPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_MAXSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_MINSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_CMPPH128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_CMPPH256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_SQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_SQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_RSQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_RSQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_RSQRTSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_RCPPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_RCPPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_RCPSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_SCALEFPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_SCALEFPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) @@ -2928,7 +2928,7 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_RNDSCALEPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_vmfpclassv8hf_mask, "__builtin_ia32_fpclasssh_mask", IX86_BUILTIN_FPCLASSSH_MASK, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getexpv16hf_mask, "__builtin_ia32_getexpph256_mask", IX86_BUILTIN_GETEXPPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) @@ -3366,26 +3366,26 @@ BDESC_END (ARGS, ROUND_ARGS) /* AVX512F. */ BDESC_FIRST (round_args, ROUND_ARGS, - OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) + OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask_round", IX86_BUILTIN_ADDSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask_round", IX86_BUILTIN_ADDSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT) @@ -3393,72 +3393,72 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_ BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask_round", IX86_BUILTIN_DIVSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask_round", IX86_BUILTIN_DIVSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_mask_round, "__builtin_ia32_getexpsd_mask_round", IX86_BUILTIN_GETEXPSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_mask_round, "__builtin_ia32_getexpss_mask_round", IX86_BUILTIN_GETEXPSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_mask_round, "__builtin_ia32_getmantsd_mask_round", IX86_BUILTIN_GETMANTSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_mask_round, "__builtin_ia32_getmantss_mask_round", IX86_BUILTIN_GETMANTSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask_round", IX86_BUILTIN_MAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask_round", IX86_BUILTIN_MAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask_round", IX86_BUILTIN_MINSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask_round", IX86_BUILTIN_MINSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask_round", IX86_BUILTIN_MULSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask_round", IX86_BUILTIN_SUBSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) @@ -3479,12 +3479,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_sse_cvttss2si_round, "__built BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) @@ -3495,100 +3495,100 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, "__ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) /* AVX512DQ. */ -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask_round, "__builtin_ia32_reducesd_mask_round", IX86_BUILTIN_REDUCESD128_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask_round, "__builtin_ia32_reducess_mask_round", IX86_BUILTIN_REDUCESS128_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT) /* AVX512FP16. */ -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_ADDSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_SUBSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_MULSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_DIVSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_MAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_MINSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_CMPSH_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_SQRTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_SCALEFSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_REDUCESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_RNDSCALESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT) BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT) @@ -3601,32 +3601,32 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__b BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT) BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask_round, "__builtin_ia32_vfmaddsh3_mask", IX86_BUILTIN_VFMADDSH3_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask3_round, "__builtin_ia32_vfmaddsh3_mask3", IX86_BUILTIN_VFMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_maskz_round, "__builtin_ia32_vfmaddsh3_maskz", IX86_BUILTIN_VFMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) @@ -3634,18 +3634,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask_round BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask3_round, "__builtin_ia32_vfnmaddsh3_mask3", IX86_BUILTIN_VFNMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_maskz_round, "__builtin_ia32_vfnmaddsh3_maskz", IX86_BUILTIN_VFNMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmsub_v8hf_mask3_round, "__builtin_ia32_vfmsubsh3_mask3", IX86_BUILTIN_VFMSUBSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fma_fcmaddcsh_v8hf_round, "__builtin_ia32_vfcmaddcsh_round", IX86_BUILTIN_VFCMADDCSH_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask1_round, "__builtin_ia32_vfcmaddcsh_mask_round", IX86_BUILTIN_VFCMADDCSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask3_round, "__builtin_ia32_vfcmaddcsh_mask3_round", IX86_BUILTIN_VFCMADDCSH_MASK3_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index 2e7381b..4835b94 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -801,102 +801,102 @@ ix86_init_mmx_sse_builtins (void) IX86_BUILTIN_GATHERALTDIV8SI); /* AVX512F */ - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16sf", V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT, IX86_BUILTIN_GATHER3SIV16SF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8df", V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT, IX86_BUILTIN_GATHER3SIV8DF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16sf", V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV16SF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8df", V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV8DF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16si", V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT, IX86_BUILTIN_GATHER3SIV16SI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8di", V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT, IX86_BUILTIN_GATHER3SIV8DI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16si", V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV16SI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8di", V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV8DI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8df ", V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, IX86_BUILTIN_GATHER3ALTSIV8DF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16sf ", V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, IX86_BUILTIN_GATHER3ALTDIV16SF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8di ", V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, IX86_BUILTIN_GATHER3ALTSIV8DI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16si ", V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, IX86_BUILTIN_GATHER3ALTDIV16SI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16sf", VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT, IX86_BUILTIN_SCATTERSIV16SF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8df", VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT, IX86_BUILTIN_SCATTERSIV8DF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16sf", VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT, IX86_BUILTIN_SCATTERDIV16SF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8df", VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT, IX86_BUILTIN_SCATTERDIV8DF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16si", VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT, IX86_BUILTIN_SCATTERSIV16SI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8di", VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT, IX86_BUILTIN_SCATTERSIV8DI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16si", VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT, IX86_BUILTIN_SCATTERDIV16SI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8di", VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT, IX86_BUILTIN_SCATTERDIV8DI); @@ -1046,22 +1046,22 @@ ix86_init_mmx_sse_builtins (void) VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT, IX86_BUILTIN_SCATTERDIV2DI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8df ", VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, IX86_BUILTIN_SCATTERALTSIV8DF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16sf ", VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, IX86_BUILTIN_SCATTERALTDIV16SF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8di ", VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, IX86_BUILTIN_SCATTERALTSIV8DI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16si ", VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, IX86_BUILTIN_SCATTERALTDIV16SI); @@ -1676,7 +1676,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype, enum ix86_builtins code; const machine_mode mode = TYPE_MODE (TREE_TYPE (mem_vectype)); - if ((!TARGET_AVX512F || !TARGET_EVEX512) && GET_MODE_SIZE (mode) == 64) + if (!TARGET_AVX512F && GET_MODE_SIZE (mode) == 64) return NULL_TREE; if (! TARGET_AVX2 diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index 0a320ca..457aa05 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -729,12 +729,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__SHA512__"); if (isa_flag2 & OPTION_MASK_ISA2_SM4) def_or_undef (parse_in, "__SM4__"); - if (isa_flag2 & OPTION_MASK_ISA2_EVEX512) - def_or_undef (parse_in, "__EVEX512__"); if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR) def_or_undef (parse_in, "__USER_MSR__"); - if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256) - def_or_undef (parse_in, "__AVX10_1_256__"); if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1) def_or_undef (parse_in, "__AVX10_1__"); if (isa_flag2 & OPTION_MASK_ISA2_APX_F) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index cdfd94d..83076ad 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -3396,8 +3396,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) too common scenario. */ start_sequence (); compare_op = ix86_expand_fp_compare (code, op0, op1); - compare_seq = get_insns (); - end_sequence (); + compare_seq = end_sequence (); if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode) code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); @@ -3561,8 +3560,7 @@ ix86_expand_int_movcc (rtx operands[]) start_sequence (); compare_op = ix86_expand_compare (code, op0, op1); - compare_seq = get_insns (); - end_sequence (); + compare_seq = end_sequence (); compare_code = GET_CODE (compare_op); @@ -3611,7 +3609,11 @@ ix86_expand_int_movcc (rtx operands[]) negate_cc_compare_p = true; } - diff = ct - cf; + diff = (unsigned HOST_WIDE_INT) ct - cf; + /* Make sure we can represent the difference between the two values. */ + if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct)) + return false; + /* Sign bit compares are better done using shifts than we do by using sbb. */ if (sign_bit_compare_p @@ -3669,7 +3671,12 @@ ix86_expand_int_movcc (rtx operands[]) PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); } - diff = ct - cf; + + diff = (unsigned HOST_WIDE_INT) ct - cf; + /* Make sure we can represent the difference + between the two values. */ + if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct)) + return false; if (reg_overlap_mentioned_p (out, compare_op)) tmp = gen_reg_rtx (mode); @@ -3687,7 +3694,12 @@ ix86_expand_int_movcc (rtx operands[]) else { std::swap (ct, cf); - diff = ct - cf; + + diff = (unsigned HOST_WIDE_INT) ct - cf; + /* Make sure we can represent the difference + between the two values. */ + if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct)) + return false; } tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); } @@ -3754,9 +3766,15 @@ ix86_expand_int_movcc (rtx operands[]) tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); } + HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct; + /* Make sure we can represent the difference + between the two values. */ + if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf)) + return false; + tmp = expand_simple_binop (mode, AND, copy_rtx (tmp), - gen_int_mode (cf - ct, mode), + gen_int_mode (ival, mode), copy_rtx (tmp), 1, OPTAB_DIRECT); if (ct) tmp = expand_simple_binop (mode, PLUS, @@ -3793,7 +3811,13 @@ ix86_expand_int_movcc (rtx operands[]) if (new_code != UNKNOWN) { std::swap (ct, cf); - diff = -diff; + + diff = (unsigned HOST_WIDE_INT) ct - cf; + /* Make sure we can represent the difference + between the two values. */ + if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct)) + return false; + code = new_code; } } @@ -3996,8 +4020,14 @@ ix86_expand_int_movcc (rtx operands[]) copy_rtx (out), 1, OPTAB_DIRECT); } + HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct; + /* Make sure we can represent the difference + between the two values. */ + if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf)) + return false; + out = expand_simple_binop (mode, AND, copy_rtx (out), - gen_int_mode (cf - ct, mode), + gen_int_mode (ival, mode), copy_rtx (out), 1, OPTAB_DIRECT); if (ct) out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), @@ -4138,6 +4168,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, return false; mode = GET_MODE (dest); + if (immediate_operand (if_false, mode)) + if_false = force_reg (mode, if_false); + if (immediate_operand (if_true, mode)) + if_true = force_reg (mode, if_true); /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, but MODE may be a vector mode and thus not appropriate. */ @@ -4186,7 +4220,7 @@ ix86_valid_mask_cmp_mode (machine_mode mode) if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW) return false; - return (vector_size == 64 && TARGET_EVEX512) || TARGET_AVX512VL; + return vector_size == 64 || TARGET_AVX512VL; } /* Return true if integer mask comparison should be used. */ @@ -4687,6 +4721,8 @@ ix86_expand_fp_movcc (rtx operands[]) compare_op = ix86_expand_compare (NE, tmp, const0_rtx); } + operands[2] = force_reg (mode, operands[2]); + operands[3] = force_reg (mode, operands[3]); emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (mode, compare_op, operands[2], operands[3]))); @@ -5022,7 +5058,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4 /* Don't do it if not using integer masks and we'd end up with the right values in the registers though. */ - && ((GET_MODE_SIZE (mode) == 64 && TARGET_EVEX512) + && (GET_MODE_SIZE (mode) == 64 || !vector_all_ones_operand (optrue, data_mode) || opfalse != CONST0_RTX (data_mode)))) { @@ -8901,31 +8937,34 @@ expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, /* Return true if ALG can be used in current context. Assume we expand memset if MEMSET is true. */ static bool -alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) +alg_usable_p (enum stringop_alg alg, bool memset, + addr_space_t dst_as, addr_space_t src_as) { if (alg == no_stringop) return false; /* It is not possible to use a library call if we have non-default address space. We can do better than the generic byte-at-a-time loop, used as a fallback. */ - if (alg == libcall && have_as) + if (alg == libcall && + !(ADDR_SPACE_GENERIC_P (dst_as) && ADDR_SPACE_GENERIC_P (src_as))) return false; if (alg == vector_loop) return TARGET_SSE || TARGET_AVX; /* Algorithms using the rep prefix want at least edi and ecx; additionally, memset wants eax and memcpy wants esi. Don't consider such algorithms if the user has appropriated those - registers for their own purposes, or if we have a non-default - address space, since some string insns cannot override the segment. */ + registers for their own purposes, or if we have the destination + in the non-default address space, since string insns cannot + override the destination segment. */ if (alg == rep_prefix_1_byte || alg == rep_prefix_4_byte || alg == rep_prefix_8_byte) { - if (have_as) - return false; if (fixed_regs[CX_REG] || fixed_regs[DI_REG] - || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) + || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]) + || !ADDR_SPACE_GENERIC_P (dst_as) + || !(ADDR_SPACE_GENERIC_P (src_as) || Pmode == word_mode)) return false; } return true; @@ -8935,8 +8974,8 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) static enum stringop_alg decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, - bool memset, bool zero_memset, bool have_as, - int *dynamic_check, bool *noalign, bool recur) + bool memset, bool zero_memset, addr_space_t dst_as, + addr_space_t src_as, int *dynamic_check, bool *noalign, bool recur) { const struct stringop_algs *algs; bool optimize_for_speed; @@ -8968,7 +9007,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, for (i = 0; i < MAX_STRINGOP_ALGS; i++) { enum stringop_alg candidate = algs->size[i].alg; - bool usable = alg_usable_p (candidate, memset, have_as); + bool usable = alg_usable_p (candidate, memset, dst_as, src_as); any_alg_usable_p |= usable; if (candidate != libcall && candidate && usable) @@ -8984,17 +9023,17 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, /* If user specified the algorithm, honor it if possible. */ if (ix86_stringop_alg != no_stringop - && alg_usable_p (ix86_stringop_alg, memset, have_as)) + && alg_usable_p (ix86_stringop_alg, memset, dst_as, src_as)) return ix86_stringop_alg; /* rep; movq or rep; movl is the smallest variant. */ else if (!optimize_for_speed) { *noalign = true; if (!count || (count & 3) || (memset && !zero_memset)) - return alg_usable_p (rep_prefix_1_byte, memset, have_as) + return alg_usable_p (rep_prefix_1_byte, memset, dst_as, src_as) ? rep_prefix_1_byte : loop_1_byte; else - return alg_usable_p (rep_prefix_4_byte, memset, have_as) + return alg_usable_p (rep_prefix_4_byte, memset, dst_as, src_as) ? rep_prefix_4_byte : loop; } /* Very tiny blocks are best handled via the loop, REP is expensive to @@ -9018,7 +9057,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, enum stringop_alg candidate = algs->size[i].alg; if (candidate != libcall - && alg_usable_p (candidate, memset, have_as)) + && alg_usable_p (candidate, memset, dst_as, src_as)) { alg = candidate; alg_noalign = algs->size[i].noalign; @@ -9038,7 +9077,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, else if (!any_alg_usable_p) break; } - else if (alg_usable_p (candidate, memset, have_as) + else if (alg_usable_p (candidate, memset, dst_as, src_as) && !(TARGET_PREFER_KNOWN_REP_MOVSB_STOSB && candidate == rep_prefix_1_byte /* NB: If min_size != max_size, size is @@ -9060,7 +9099,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, choice in ix86_costs. */ if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) && (algs->unknown_size == libcall - || !alg_usable_p (algs->unknown_size, memset, have_as))) + || !alg_usable_p (algs->unknown_size, memset, dst_as, src_as))) { enum stringop_alg alg; HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; @@ -9075,8 +9114,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *dynamic_check = 128; return loop_1_byte; } - alg = decide_alg (count, new_expected_size, min_size, max_size, memset, - zero_memset, have_as, dynamic_check, noalign, true); + alg = decide_alg (count, new_expected_size, min_size, max_size, + memset, zero_memset, dst_as, src_as, + dynamic_check, noalign, true); gcc_assert (*dynamic_check == -1); if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) *dynamic_check = max; @@ -9088,7 +9128,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, /* Try to use some reasonable fallback algorithm. Note that for non-default address spaces we default to a loop instead of a libcall. */ - return (alg_usable_p (algs->unknown_size, memset, have_as) + + bool have_as = !(ADDR_SPACE_GENERIC_P (dst_as) + && ADDR_SPACE_GENERIC_P (src_as)); + + return (alg_usable_p (algs->unknown_size, memset, dst_as, src_as) ? algs->unknown_size : have_as ? loop : libcall); } @@ -9307,14 +9351,13 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, bool need_zero_guard = false; bool noalign; machine_mode move_mode = VOIDmode; - machine_mode wider_mode; int unroll_factor = 1; /* TODO: Once value ranges are available, fill in proper data. */ unsigned HOST_WIDE_INT min_size = 0; unsigned HOST_WIDE_INT max_size = -1; unsigned HOST_WIDE_INT probable_max_size = -1; bool misaligned_prologue_used = false; - bool have_as; + addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC; if (CONST_INT_P (align_exp)) align = INTVAL (align_exp); @@ -9352,16 +9395,15 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, if (count > (HOST_WIDE_INT_1U << 30)) return false; - have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); + dst_as = MEM_ADDR_SPACE (dst); if (!issetmem) - have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); + src_as = MEM_ADDR_SPACE (src); /* Step 0: Decide on preferred algorithm, desired alignment and size of chunks to be copied by main loop. */ alg = decide_alg (count, expected_size, min_size, probable_max_size, - issetmem, - issetmem && val_exp == const0_rtx, have_as, - &dynamic_check, &noalign, false); + issetmem, issetmem && val_exp == const0_rtx, + dst_as, src_as, &dynamic_check, &noalign, false); if (dump_file) fprintf (dump_file, "Selected stringop expansion strategy: %s\n", @@ -9384,6 +9426,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, unroll_factor = 1; move_mode = word_mode; + int nunits; switch (alg) { case libcall: @@ -9404,27 +9447,14 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, case vector_loop: need_zero_guard = true; unroll_factor = 4; - /* Find the widest supported mode. */ - move_mode = word_mode; - while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode) - && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing) - move_mode = wider_mode; - - if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128) - move_mode = TImode; - if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256) - move_mode = OImode; - - /* Find the corresponding vector mode with the same size as MOVE_MODE. - MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ - if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) + /* Get the vector mode to move MOVE_MAX bytes. */ + nunits = MOVE_MAX / GET_MODE_SIZE (word_mode); + if (nunits > 1) { - int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); - if (!mode_for_vector (word_mode, nunits).exists (&move_mode) - || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing) - move_mode = word_mode; + move_mode = mode_for_vector (word_mode, nunits).require (); + gcc_assert (optab_handler (mov_optab, move_mode) + != CODE_FOR_nothing); } - gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing); break; case rep_prefix_8_byte: move_mode = DImode; @@ -10108,9 +10138,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, if (lookup_attribute ("interrupt", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) error ("interrupt service routine cannot be called directly"); - else if (lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + else if (ix86_type_no_callee_saved_registers_p (TREE_TYPE (fndecl))) call_no_callee_saved_registers = true; + if (fndecl == current_function_decl + && decl_binds_to_current_def_p (fndecl)) + cfun->machine->recursive_function = true; } } else @@ -10120,8 +10152,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, tree mem_expr = MEM_EXPR (fnaddr); if (mem_expr != nullptr && TREE_CODE (mem_expr) == MEM_REF - && lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (mem_expr)))) + && ix86_type_no_callee_saved_registers_p (TREE_TYPE (mem_expr))) call_no_callee_saved_registers = true; } @@ -10346,6 +10377,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi); for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) if (!fixed_regs[i] + && i != HARD_FRAME_POINTER_REGNUM && !(ix86_call_used_regs[i] == 1 || (ix86_call_used_regs[i] & c_mask)) && !STACK_REGNO_P (i) @@ -11244,6 +11276,54 @@ fixup_modeless_constant (rtx x, machine_mode mode) return x; } +/* Expand the outgoing argument ARG to extract unsigned char and short + integer constants suitable for the predicates and the instruction + templates which expect the unsigned expanded value. */ + +static rtx +ix86_expand_unsigned_small_int_cst_argument (tree arg) +{ + /* When passing 0xff as an unsigned char function argument with the + C frontend promotion, expand_normal gets + + <integer_cst 0x7fffe6aa23a8 type <integer_type 0x7fffe98225e8 int> constant 255> + + and returns the rtx value using the sign-extended representation: + + (const_int 255 [0xff]) + + Without the C frontend promotion, expand_normal gets + + <integer_cst 0x7fffe9824018 type <integer_type 0x7fffe9822348 unsigned char > constant 255> + + and returns + + (const_int -1 [0xffffffffffffffff]) + + which doesn't work with the predicates nor the instruction templates + which expect the unsigned expanded value. Extract the unsigned char + and short integer constants to return + + (const_int 255 [0xff]) + + so that the expanded value is always unsigned, without the C frontend + promotion. */ + + if (TREE_CODE (arg) == INTEGER_CST) + { + tree type = TREE_TYPE (arg); + if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (type) + && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) + { + HOST_WIDE_INT cst = TREE_INT_CST_LOW (arg); + return GEN_INT (cst); + } + } + + return expand_normal (arg); +} + /* Subroutine of ix86_expand_builtin to take care of insns with variable number of operands. */ @@ -12142,7 +12222,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, for (i = 0; i < nargs; i++) { tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); + rtx op = ix86_expand_unsigned_small_int_cst_argument (arg); machine_mode mode = insn_p->operand[i + 1].mode; /* Need to fixup modeless constant before testing predicate. */ op = fixup_modeless_constant (op, mode); @@ -12837,7 +12917,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, for (i = 0; i < nargs; i++) { tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); + rtx op = ix86_expand_unsigned_small_int_cst_argument (arg); machine_mode mode = insn_p->operand[i + 1].mode; bool match = insn_p->operand[i + 1].predicate (op, mode); @@ -13322,7 +13402,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, machine_mode mode = insn_p->operand[i + 1].mode; arg = CALL_EXPR_ARG (exp, i + arg_adjust); - op = expand_normal (arg); + op = ix86_expand_unsigned_small_int_cst_argument (arg); if (i == memory) { @@ -15466,7 +15546,7 @@ rdseed_step: op0 = expand_normal (arg0); op1 = expand_normal (arg1); op2 = expand_normal (arg2); - op3 = expand_normal (arg3); + op3 = ix86_expand_unsigned_small_int_cst_argument (arg3); op4 = expand_normal (arg4); /* Note the arg order is different from the operand order. */ mode0 = insn_data[icode].operand[1].mode; @@ -15681,7 +15761,7 @@ rdseed_step: arg3 = CALL_EXPR_ARG (exp, 3); arg4 = CALL_EXPR_ARG (exp, 4); op0 = expand_normal (arg0); - op1 = expand_normal (arg1); + op1 = ix86_expand_unsigned_small_int_cst_argument (arg1); op2 = expand_normal (arg2); op3 = expand_normal (arg3); op4 = expand_normal (arg4); @@ -16130,7 +16210,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, { case VEC_BCAST_PXOR: if ((mode == V8SImode && !TARGET_AVX2) - || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512))) + || (mode == V16SImode && !TARGET_AVX512F)) return false; emit_move_insn (target, CONST0_RTX (mode)); return true; @@ -16138,7 +16218,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, case VEC_BCAST_PCMPEQ: if ((mode == V4SImode && !TARGET_SSE2) || (mode == V8SImode && !TARGET_AVX2) - || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512))) + || (mode == V16SImode && !TARGET_AVX512F)) return false; emit_move_insn (target, CONSTM1_RTX (mode)); return true; @@ -16158,7 +16238,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, tmp2 = gen_reg_rtx (V32QImode); emit_insn (gen_absv32qi2 (tmp2, tmp1)); } - else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512BW) { tmp1 = gen_reg_rtx (V64QImode); emit_move_insn (tmp1, CONSTM1_RTX (V64QImode)); @@ -16184,7 +16264,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, tmp2 = gen_reg_rtx (V32QImode); emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1)); } - else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512BW) { tmp1 = gen_reg_rtx (V64QImode); emit_move_insn (tmp1, CONSTM1_RTX (V64QImode)); @@ -16210,7 +16290,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, tmp2 = gen_reg_rtx (V16HImode); emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg))); } - else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512BW) { tmp1 = gen_reg_rtx (V32HImode); emit_move_insn (tmp1, CONSTM1_RTX (V32HImode)); @@ -16236,7 +16316,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg))); return true; } - else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512F) { tmp1 = gen_reg_rtx (V16SImode); emit_move_insn (tmp1, CONSTM1_RTX (V16SImode)); @@ -16262,7 +16342,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, tmp2 = gen_reg_rtx (V16HImode); emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg))); } - else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512BW) { tmp1 = gen_reg_rtx (V32HImode); emit_move_insn (tmp1, CONSTM1_RTX (V32HImode)); @@ -16288,7 +16368,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg))); return true; } - else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512F) { tmp1 = gen_reg_rtx (V16SImode); emit_move_insn (tmp1, CONSTM1_RTX (V16SImode)); @@ -16342,8 +16422,7 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) if (GET_MODE (reg) != innermode) reg = gen_lowpart (innermode, reg); SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (seq) emit_insn_before (seq, insn); @@ -16659,7 +16738,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, case E_V32HFmode: case E_V32BFmode: - gcc_assert (TARGET_EVEX512); if (TARGET_AVX512BW) return ix86_vector_duplicate_value (mode, target, val); else @@ -16712,9 +16790,6 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, bool use_vector_set = false; rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; - if (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512) - return false; - switch (mode) { case E_V2DImode: @@ -18670,6 +18745,33 @@ emit_reduc_half (rtx dest, rtx src, int i) case E_V8HFmode: case E_V4SImode: case E_V2DImode: + if (TARGET_SSE_REDUCTION_PREFER_PSHUF) + { + if (i == 128) + { + d = gen_reg_rtx (V4SImode); + tem = gen_sse2_pshufd_1 ( + d, force_reg (V4SImode, gen_lowpart (V4SImode, src)), + GEN_INT (2), GEN_INT (3), GEN_INT (2), GEN_INT (3)); + break; + } + else if (i == 64) + { + d = gen_reg_rtx (V4SImode); + tem = gen_sse2_pshufd_1 ( + d, force_reg (V4SImode, gen_lowpart (V4SImode, src)), + GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1)); + break; + } + else if (i == 32) + { + d = gen_reg_rtx (V8HImode); + tem = gen_sse2_pshuflw_1 ( + d, force_reg (V8HImode, gen_lowpart (V8HImode, src)), + GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1)); + break; + } + } d = gen_reg_rtx (V1TImode); tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src), GEN_INT (i / 2)); @@ -19256,8 +19358,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) e1 = gen_reg_rtx (mode); x1 = gen_reg_rtx (mode); - /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ - b = force_reg (mode, b); /* x0 = rcp(b) estimate */ @@ -19270,20 +19370,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), UNSPEC_RCP))); - /* e0 = x0 * b */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + unsigned vector_size = GET_MODE_SIZE (mode); + + /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a + N-R step with 2 fma implementation. */ + if (TARGET_FMA + || (TARGET_AVX512F && vector_size == 64) + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) + { + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * b - a */ + emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b, + gen_rtx_NEG (mode, a)))); + /* res = - e1 * x0 + e0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, e1), + x0, e0))); + } + else + /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ + { + /* e0 = x0 * b */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); - /* e0 = x0 * e0 */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); + /* e1 = x0 + x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); - /* e1 = x0 + x0 */ - emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); + /* e0 = x0 * e0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); - /* x1 = e1 - e0 */ - emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); + /* x1 = e1 - e0 */ + emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); - /* res = a * x1 */ - emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + /* res = a * x1 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + } } /* Output code to perform a Newton-Rhapson approximation of a @@ -19356,7 +19478,7 @@ ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) unsigned vector_size = GET_MODE_SIZE (mode); if (TARGET_FMA - || (TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64) + || (TARGET_AVX512F && vector_size == 64) || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode, e0, x0, mthree))); @@ -22018,8 +22140,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ start_sequence (); ok = expand_vec_perm_1 (&dfinal); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (!ok) return false; @@ -22355,8 +22476,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (!ok) return false; @@ -22464,8 +22584,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22475,8 +22594,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -22590,8 +22708,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22601,8 +22718,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -22796,8 +22912,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d) canonicalize_perm (&dfirst); start_sequence (); ok = ix86_expand_vec_perm_const_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22805,8 +22920,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d) canonicalize_perm (&dsecond); start_sequence (); ok = ix86_expand_vec_perm_const_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -24290,9 +24404,6 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, unsigned int i, nelt, which; bool two_args; - if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512) - return false; - /* For HF and BF mode vector, convert it to HI using subreg. */ if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode) { @@ -24834,7 +24945,6 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2) ix86_expand_vecop_qihi. */ if (!TARGET_AVX512BW || (qimode == V16QImode && !TARGET_AVX512VL) - || (qimode == V32QImode && !TARGET_EVEX512) /* There are no V64HImode instructions. */ || qimode == V64QImode) return false; @@ -25303,7 +25413,7 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) machine_mode mode = GET_MODE (op0); rtx t1, t2, t3, t4, t5, t6; - if (TARGET_AVX512DQ && TARGET_EVEX512 && mode == V8DImode) + if (TARGET_AVX512DQ && mode == V8DImode) emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode) emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); @@ -26033,8 +26143,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, } } - *prep_seq = get_insns (); - end_sequence (); + *prep_seq = end_sequence (); start_sequence (); @@ -26045,8 +26154,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, end_sequence (); return NULL_RTX; } - *gen_seq = get_insns (); - end_sequence (); + *gen_seq = end_sequence (); return res; } @@ -26089,8 +26197,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, return NULL_RTX; } - *prep_seq = get_insns (); - end_sequence (); + *prep_seq = end_sequence (); target = gen_rtx_REG (cc_mode, FLAGS_REG); dfv = ix86_get_flags_cc ((rtx_code) cmp_code); @@ -26121,8 +26228,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, return NULL_RTX; } - *gen_seq = get_insns (); - end_sequence (); + *gen_seq = end_sequence (); return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx); } @@ -26136,8 +26242,7 @@ ix86_gen_bcst_mem (machine_mode mode, rtx x) { if (!TARGET_AVX512F || !CONST_VECTOR_P (x) - || (!TARGET_AVX512VL - && (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512)) + || (!TARGET_AVX512VL && GET_MODE_SIZE (mode) != 64) || !VALID_BCST_MODE_P (GET_MODE_INNER (mode)) /* Disallow HFmode broadcast. */ || GET_MODE_SIZE (GET_MODE_INNER (mode)) < 4) diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index c35ac24..054f8d5 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -296,9 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) insns_conv = BITMAP_ALLOC (NULL); queue = NULL; - n_sse_to_integer = 0; - n_integer_to_sse = 0; - + cost_sse_integer = 0; + weighted_cost_sse_integer = 0 ; max_visits = x86_stv_max_visits; } @@ -337,20 +336,52 @@ scalar_chain::mark_dual_mode_def (df_ref def) /* Record the def/insn pair so we can later efficiently iterate over the defs to convert on insns not in the chain. */ bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); + basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def)); + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + int cost = 0; + if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def))) { if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def)) && !reg_new) return; - n_integer_to_sse++; + + /* Cost integer to sse moves. */ + if (speed_p) + cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; + else if (TARGET_64BIT || smode == SImode) + cost = COSTS_N_BYTES (4); + /* vmovd (4 bytes) + vpinsrd (6 bytes). */ + else if (TARGET_SSE4_1) + cost = COSTS_N_BYTES (10); + /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ + else + cost = COSTS_N_BYTES (12); } else { if (!reg_new) return; - n_sse_to_integer++; + + /* Cost sse to integer moves. */ + if (speed_p) + cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2; + else if (TARGET_64BIT || smode == SImode) + cost = COSTS_N_BYTES (4); + /* vmovd (4 bytes) + vpextrd (6 bytes). */ + else if (TARGET_SSE4_1) + cost = COSTS_N_BYTES (10); + /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ + else + cost = COSTS_N_BYTES (13); } + if (speed_p) + weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost; + + cost_sse_integer += cost; + if (dump_file) fprintf (dump_file, " Mark r%d def in insn %d as requiring both modes in chain #%d\n", @@ -518,26 +549,28 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed) instead of using a scalar one. */ int -general_scalar_chain::vector_const_cost (rtx exp) +general_scalar_chain::vector_const_cost (rtx exp, basic_block bb) { gcc_assert (CONST_INT_P (exp)); if (standard_sse_constant_p (exp, vmode)) return ix86_cost->sse_op; + if (optimize_bb_for_size_p (bb)) + return COSTS_N_BYTES (8); /* We have separate costs for SImode and DImode, use SImode costs for smaller modes. */ - return ix86_cost->sse_load[smode == DImode ? 1 : 0]; + return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2; } -/* Compute a gain for chain conversion. */ +/* Return true if it's cost profitable for chain conversion. */ -int +bool general_scalar_chain::compute_convert_gain () { bitmap_iterator bi; unsigned insn_uid; int gain = 0; - int cost = 0; + sreal weighted_gain = 0; if (dump_file) fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); @@ -547,7 +580,7 @@ general_scalar_chain::compute_convert_gain () smaller modes than SImode the int load/store costs need to be adjusted as well. */ unsigned sse_cost_idx = smode == DImode ? 1 : 0; - unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1; + int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1; EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) { @@ -555,26 +588,58 @@ general_scalar_chain::compute_convert_gain () rtx def_set = single_set (insn); rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); + basic_block bb = BLOCK_FOR_INSN (insn); int igain = 0; + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + sreal bb_freq = bb->count.to_sreal_scale (entry_count); if (REG_P (src) && REG_P (dst)) - igain += 2 * m - ix86_cost->xmm_move; + { + if (!speed_p) + /* reg-reg move is 2 bytes, while SSE 3. */ + igain += COSTS_N_BYTES (2 * m - 3); + else + /* Move costs are normalized to reg-reg move having cost 2. */ + igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2; + } else if (REG_P (src) && MEM_P (dst)) - igain - += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; + { + if (!speed_p) + /* Integer load/store is 3+ bytes and SSE 4+. */ + igain += COSTS_N_BYTES (3 * m - 4); + else + igain + += COSTS_N_INSNS (m * ix86_cost->int_store[2] + - ix86_cost->sse_store[sse_cost_idx]) / 2; + } else if (MEM_P (src) && REG_P (dst)) - igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; + { + if (!speed_p) + igain += COSTS_N_BYTES (3 * m - 4); + else + igain += COSTS_N_INSNS (m * ix86_cost->int_load[2] + - ix86_cost->sse_load[sse_cost_idx]) / 2; + } else { /* For operations on memory operands, include the overhead of explicit load and store instructions. */ if (MEM_P (dst)) - igain += optimize_insn_for_size_p () - ? -COSTS_N_BYTES (8) - : (m * (ix86_cost->int_load[2] - + ix86_cost->int_store[2]) - - (ix86_cost->sse_load[sse_cost_idx] + - ix86_cost->sse_store[sse_cost_idx])); + { + if (!speed_p) + /* ??? This probably should account size difference + of SSE and integer load rather than full SSE load. */ + igain -= COSTS_N_BYTES (8); + else + { + int cost = (m * (ix86_cost->int_load[2] + + ix86_cost->int_store[2]) + - (ix86_cost->sse_load[sse_cost_idx] + + ix86_cost->sse_store[sse_cost_idx])); + igain += COSTS_N_INSNS (cost) / 2; + } + } switch (GET_CODE (src)) { @@ -595,7 +660,7 @@ general_scalar_chain::compute_convert_gain () igain += ix86_cost->shift_const - ix86_cost->sse_op; if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); + igain -= vector_const_cost (XEXP (src, 0), bb); break; case ROTATE: @@ -631,16 +696,17 @@ general_scalar_chain::compute_convert_gain () igain += m * ix86_cost->add; if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); + igain -= vector_const_cost (XEXP (src, 0), bb); if (CONST_INT_P (XEXP (src, 1))) - igain -= vector_const_cost (XEXP (src, 1)); + igain -= vector_const_cost (XEXP (src, 1), bb); if (MEM_P (XEXP (src, 1))) { - if (optimize_insn_for_size_p ()) + if (!speed_p) igain -= COSTS_N_BYTES (m == 2 ? 3 : 5); else - igain += m * ix86_cost->int_load[2] - - ix86_cost->sse_load[sse_cost_idx]; + igain += COSTS_N_INSNS + (m * ix86_cost->int_load[2] + - ix86_cost->sse_load[sse_cost_idx]) / 2; } break; @@ -698,7 +764,7 @@ general_scalar_chain::compute_convert_gain () case CONST_INT: if (REG_P (dst)) { - if (optimize_insn_for_size_p ()) + if (!speed_p) { /* xor (2 bytes) vs. xorps (3 bytes). */ if (src == const0_rtx) @@ -722,14 +788,14 @@ general_scalar_chain::compute_convert_gain () /* DImode can be immediate for TARGET_64BIT and SImode always. */ igain += m * COSTS_N_INSNS (1); - igain -= vector_const_cost (src); + igain -= vector_const_cost (src, bb); } } else if (MEM_P (dst)) { igain += (m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]); - igain -= vector_const_cost (src); + igain -= vector_const_cost (src, bb); } break; @@ -737,13 +803,14 @@ general_scalar_chain::compute_convert_gain () if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx) { // movd (4 bytes) replaced with movdqa (4 bytes). - if (!optimize_insn_for_size_p ()) - igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move; + if (!!speed_p) + igain += COSTS_N_INSNS (ix86_cost->sse_to_integer + - ix86_cost->xmm_move) / 2; } else { // pshufd; movd replaced with pshufd. - if (optimize_insn_for_size_p ()) + if (!speed_p) igain += COSTS_N_BYTES (4); else igain += ix86_cost->sse_to_integer; @@ -755,55 +822,34 @@ general_scalar_chain::compute_convert_gain () } } + if (speed_p) + weighted_gain += bb_freq * igain; + gain += igain; + if (igain != 0 && dump_file) { - fprintf (dump_file, " Instruction gain %d for ", igain); + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for", + igain, bb_freq.to_double ()); dump_insn_slim (dump_file, insn); } - gain += igain; } if (dump_file) - fprintf (dump_file, " Instruction conversion gain: %d\n", gain); - - /* Cost the integer to sse and sse to integer moves. */ - if (!optimize_function_for_size_p (cfun)) - { - cost += n_sse_to_integer * ix86_cost->sse_to_integer; - /* ??? integer_to_sse but we only have that in the RA cost table. - Assume sse_to_integer/integer_to_sse are the same which they - are at the moment. */ - cost += n_integer_to_sse * ix86_cost->sse_to_integer; - } - else if (TARGET_64BIT || smode == SImode) - { - cost += n_sse_to_integer * COSTS_N_BYTES (4); - cost += n_integer_to_sse * COSTS_N_BYTES (4); - } - else if (TARGET_SSE4_1) - { - /* vmovd (4 bytes) + vpextrd (6 bytes). */ - cost += n_sse_to_integer * COSTS_N_BYTES (10); - /* vmovd (4 bytes) + vpinsrd (6 bytes). */ - cost += n_integer_to_sse * COSTS_N_BYTES (10); - } - else { - /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ - cost += n_sse_to_integer * COSTS_N_BYTES (13); - /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ - cost += n_integer_to_sse * COSTS_N_BYTES (12); + fprintf (dump_file, " Instruction conversion gain: %d, \n", + gain); + fprintf (dump_file, " Registers conversion cost: %d\n", + cost_sse_integer); + fprintf (dump_file, " Weighted instruction conversion gain: %.2f, \n", + weighted_gain.to_double ()); + fprintf (dump_file, " Weighted registers conversion cost: %.2f\n", + weighted_cost_sse_integer.to_double ()); } - if (dump_file) - fprintf (dump_file, " Registers conversion cost: %d\n", cost); - - gain -= cost; - - if (dump_file) - fprintf (dump_file, " Total gain: %d\n", gain); - - return gain; + if (weighted_gain != weighted_cost_sse_integer) + return weighted_gain > weighted_cost_sse_integer; + else + return gain > cost_sse_integer;; } /* Insert generated conversion instruction sequence INSNS @@ -902,8 +948,7 @@ scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg) else emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), gen_gpr_to_xmm_move_src (vmode, reg))); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_conversion_insns (seq, insn); if (dump_file) @@ -970,8 +1015,7 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src) else emit_move_insn (dst, src); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_conversion_insns (seq, insn); if (dump_file) @@ -1066,8 +1110,7 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn) { start_sequence (); vec_cst = validize_mem (force_const_mem (vmode, vec_cst)); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_insn_before (seq, insn); } @@ -1508,33 +1551,34 @@ general_scalar_chain::convert_insn (rtx_insn *insn) with numerous special cases. */ static int -timode_immed_const_gain (rtx cst) +timode_immed_const_gain (rtx cst, basic_block bb) { /* movabsq vs. movabsq+vmovq+vunpacklqdq. */ if (CONST_WIDE_INT_P (cst) && CONST_WIDE_INT_NUNITS (cst) == 2 && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1)) - return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9) + return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9) : -COSTS_N_INSNS (2); /* 2x movabsq ~ vmovdqa. */ return 0; } -/* Compute a gain for chain conversion. */ +/* Return true it's cost profitable for for chain conversion. */ -int +bool timode_scalar_chain::compute_convert_gain () { /* Assume that if we have to move TImode values between units, then transforming this chain isn't worth it. */ - if (n_sse_to_integer || n_integer_to_sse) - return -1; + if (cost_sse_integer) + return false; bitmap_iterator bi; unsigned insn_uid; /* Split ties to prefer V1TImode when not optimizing for size. */ int gain = optimize_size ? 0 : 1; + sreal weighted_gain = 0; if (dump_file) fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); @@ -1546,34 +1590,36 @@ timode_scalar_chain::compute_convert_gain () rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); HOST_WIDE_INT op1val; + basic_block bb = BLOCK_FOR_INSN (insn); int scost, vcost; int igain = 0; + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + sreal bb_freq = bb->count.to_sreal_scale (entry_count); switch (GET_CODE (src)) { case REG: - if (optimize_insn_for_size_p ()) + if (!speed_p) igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3); else igain = COSTS_N_INSNS (1); break; case MEM: - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7) - : COSTS_N_INSNS (1); + igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1); break; case CONST_INT: if (MEM_P (dst) && standard_sse_constant_p (src, V1TImode)) - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1; + igain = !speed_p ? COSTS_N_BYTES (11) : 1; break; case CONST_WIDE_INT: /* 2 x mov vs. vmovdqa. */ if (MEM_P (dst)) - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (3) - : COSTS_N_INSNS (1); + igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1); break; case NOT: @@ -1587,14 +1633,14 @@ timode_scalar_chain::compute_convert_gain () if (!MEM_P (dst)) igain = COSTS_N_INSNS (1); if (CONST_SCALAR_INT_P (XEXP (src, 1))) - igain += timode_immed_const_gain (XEXP (src, 1)); + igain += timode_immed_const_gain (XEXP (src, 1), bb); break; case ASHIFT: case LSHIFTRT: /* See ix86_expand_v1ti_shift. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_insn_for_size_p ()) + if (!speed_p) { if (op1val == 64 || op1val == 65) scost = COSTS_N_BYTES (5); @@ -1628,7 +1674,7 @@ timode_scalar_chain::compute_convert_gain () case ASHIFTRT: /* See ix86_expand_v1ti_ashiftrt. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_insn_for_size_p ()) + if (!speed_p) { if (op1val == 64 || op1val == 127) scost = COSTS_N_BYTES (7); @@ -1706,7 +1752,7 @@ timode_scalar_chain::compute_convert_gain () case ROTATERT: /* See ix86_expand_v1ti_rotate. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_insn_for_size_p ()) + if (!speed_p) { scost = COSTS_N_BYTES (13); if ((op1val & 31) == 0) @@ -1738,34 +1784,40 @@ timode_scalar_chain::compute_convert_gain () { if (GET_CODE (XEXP (src, 0)) == AND) /* and;and;or (9 bytes) vs. ptest (5 bytes). */ - igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4) - : COSTS_N_INSNS (2); + igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2); /* or (3 bytes) vs. ptest (5 bytes). */ - else if (optimize_insn_for_size_p ()) + else if (!speed_p) igain = -COSTS_N_BYTES (2); } else if (XEXP (src, 1) == const1_rtx) /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */ - igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6) - : -COSTS_N_INSNS (1); + igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1); break; default: break; } + gain += igain; + if (speed_p) + weighted_gain += bb_freq * igain; + if (igain != 0 && dump_file) { - fprintf (dump_file, " Instruction gain %d for ", igain); + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for ", + igain, bb_freq.to_double ()); dump_insn_slim (dump_file, insn); } - gain += igain; } if (dump_file) - fprintf (dump_file, " Total gain: %d\n", gain); + fprintf (dump_file, " Total gain: %d, weighted gain %.2f\n", + gain, weighted_gain.to_double ()); - return gain; + if (weighted_gain > (sreal) 0) + return true; + else + return gain > 0; } /* Fix uses of converted REG in debug insns. */ @@ -1874,8 +1926,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) src = validize_mem (force_const_mem (V1TImode, src)); use_move = MEM_P (dst); } - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); if (seq) emit_insn_before (seq, insn); if (use_move) @@ -2561,7 +2612,7 @@ convert_scalars_to_vector (bool timode_p) conversions. */ if (chain->build (&candidates[i], uid, disallowed)) { - if (chain->compute_convert_gain () > 0) + if (chain->compute_convert_gain ()) converted_insns += chain->convert (); else if (dump_file) fprintf (dump_file, "Chain #%d conversion is not profitable\n", @@ -3034,6 +3085,82 @@ ix86_rpad_gate () && optimize_function_for_speed_p (cfun)); } +/* Generate a vector set, DEST = SRC, at entry of the nearest dominator + for basic block map BBS, which is in the fake loop that contains the + whole function, so that there is only a single vector set in the + whole function. If not nullptr, INNER_SCALAR is the inner scalar of + SRC, as (reg:SI 99) in (vec_duplicate:V4SI (reg:SI 99)). */ + +static void +ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs, + rtx inner_scalar = nullptr) +{ + basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); + while (bb->loop_father->latch + != EXIT_BLOCK_PTR_FOR_FN (cfun)) + bb = get_immediate_dominator (CDI_DOMINATORS, + bb->loop_father->header); + + rtx set = gen_rtx_SET (dest, src); + + rtx_insn *insn = BB_HEAD (bb); + while (insn && !NONDEBUG_INSN_P (insn)) + { + if (insn == BB_END (bb)) + { + insn = NULL; + break; + } + insn = NEXT_INSN (insn); + } + + rtx_insn *set_insn; + if (insn == BB_HEAD (bb)) + { + set_insn = emit_insn_before (set, insn); + if (dump_file) + { + fprintf (dump_file, "\nPlace:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } + } + else + { + rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb); + set_insn = emit_insn_after (set, after); + if (dump_file) + { + fprintf (dump_file, "\nPlace:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, after); + fprintf (dump_file, "\n"); + } + } + + if (inner_scalar) + { + /* Set the source in (vec_duplicate:V4SI (reg:SI 99)). */ + rtx reg = XEXP (src, 0); + if ((REG_P (inner_scalar) || MEM_P (inner_scalar)) + && GET_MODE (reg) != GET_MODE (inner_scalar)) + inner_scalar = gen_rtx_SUBREG (GET_MODE (reg), inner_scalar, 0); + rtx set = gen_rtx_SET (reg, inner_scalar); + insn = emit_insn_before (set, set_insn); + if (dump_file) + { + fprintf (dump_file, "\nAdd:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\n"); + } + } +} + /* At entry of the nearest common dominator for basic blocks with conversions/rcp/sqrt/rsqrt/round, generate a single vxorps %xmmN, %xmmN, %xmmN @@ -3155,7 +3282,6 @@ remove_partial_avx_dependency (void) /* Generate an XMM vector SET. */ set = gen_rtx_SET (vec, src); set_insn = emit_insn_before (set, insn); - df_insn_rescan (set_insn); if (cfun->can_throw_non_call_exceptions) { @@ -3188,35 +3314,10 @@ remove_partial_avx_dependency (void) calculate_dominance_info (CDI_DOMINATORS); loop_optimizer_init (AVOID_CFG_MODIFICATIONS); - /* Generate a vxorps at entry of the nearest dominator for basic - blocks with conversions, which is in the fake loop that - contains the whole function, so that there is only a single - vxorps in the whole function. */ - bb = nearest_common_dominator_for_set (CDI_DOMINATORS, - convert_bbs); - while (bb->loop_father->latch - != EXIT_BLOCK_PTR_FOR_FN (cfun)) - bb = get_immediate_dominator (CDI_DOMINATORS, - bb->loop_father->header); - - set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); - - insn = BB_HEAD (bb); - while (insn && !NONDEBUG_INSN_P (insn)) - { - if (insn == BB_END (bb)) - { - insn = NULL; - break; - } - insn = NEXT_INSN (insn); - } - if (insn == BB_HEAD (bb)) - set_insn = emit_insn_before (set, insn); - else - set_insn = emit_insn_after (set, - insn ? PREV_INSN (insn) : BB_END (bb)); - df_insn_rescan (set_insn); + ix86_place_single_vector_set (v4sf_const0, + CONST0_RTX (V4SFmode), + convert_bbs); + loop_optimizer_finalize (); if (!control_flow_insns.is_empty ()) @@ -3288,6 +3389,568 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt) return new pass_remove_partial_avx_dependency (ctxt); } +/* Return a machine mode suitable for vector SIZE with SMODE inner + mode. */ + +static machine_mode +ix86_get_vector_cse_mode (unsigned int size, machine_mode smode) +{ + /* Use the inner scalar mode of vector broadcast source in: + + (set (reg:V8DF 394) + (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ]))) + + to compute the vector mode for broadcast from vector source. + */ + if (VECTOR_MODE_P (smode)) + smode = GET_MODE_INNER (smode); + scalar_mode s_mode = as_a <scalar_mode> (smode); + poly_uint64 nunits = size / GET_MODE_SIZE (smode); + machine_mode mode = mode_for_vector (s_mode, nunits).require (); + return mode; +} + +/* Replace the source operand of instructions in VECTOR_INSNS with + VECTOR_CONST in VECTOR_MODE. */ + +static void +replace_vector_const (machine_mode vector_mode, rtx vector_const, + auto_bitmap &vector_insns, + machine_mode scalar_mode) +{ + bitmap_iterator bi; + unsigned int id; + + EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi) + { + rtx_insn *insn = DF_INSN_UID_GET (id)->insn; + + /* Get the single SET instruction. */ + rtx set = single_set (insn); + rtx src = SET_SRC (set); + rtx dest = SET_DEST (set); + machine_mode mode = GET_MODE (dest); + + rtx replace; + /* Replace the source operand with VECTOR_CONST. */ + if (SUBREG_P (src) || mode == vector_mode) + replace = vector_const; + else + { + unsigned int size = GET_MODE_SIZE (mode); + if (size < ix86_regmode_natural_size (mode)) + { + /* If the mode size is smaller than its natural size, + first insert an extra move with a QI vector SUBREG + of the same size to avoid validate_subreg failure. */ + machine_mode vmode + = ix86_get_vector_cse_mode (size, scalar_mode); + rtx vreg; + if (mode == vmode) + vreg = vector_const; + else + { + vreg = gen_reg_rtx (vmode); + rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0); + rtx pat = gen_rtx_SET (vreg, vsubreg); + rtx_insn *vinsn = emit_insn_before (pat, insn); + if (dump_file) + { + fprintf (dump_file, "\nInsert an extra move:\n\n"); + print_rtl_single (dump_file, vinsn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } + } + replace = gen_rtx_SUBREG (mode, vreg, 0); + } + else + replace = gen_rtx_SUBREG (mode, vector_const, 0); + } + + if (dump_file) + { + fprintf (dump_file, "\nReplace:\n\n"); + print_rtl_single (dump_file, insn); + } + SET_SRC (set) = replace; + /* Drop possible dead definitions. */ + PATTERN (insn) = set; + INSN_CODE (insn) = -1; + recog_memoized (insn); + if (dump_file) + { + fprintf (dump_file, "\nwith:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } + df_insn_rescan (insn); + } +} + +enum x86_cse_kind +{ + X86_CSE_CONST0_VECTOR, + X86_CSE_CONSTM1_VECTOR, + X86_CSE_VEC_DUP +}; + +struct redundant_load +{ + /* Bitmap of basic blocks with broadcast instructions. */ + auto_bitmap bbs; + /* Bitmap of broadcast instructions. */ + auto_bitmap insns; + /* The broadcast inner scalar. */ + rtx val; + /* The inner scalar mode. */ + machine_mode mode; + /* The instruction which sets the inner scalar. Nullptr if the inner + scalar is applied to the whole function, instead of within the same + block. */ + rtx_insn *def_insn; + /* The widest broadcast source. */ + rtx broadcast_source; + /* The widest broadcast register. */ + rtx broadcast_reg; + /* The basic block of the broadcast instruction. */ + basic_block bb; + /* The number of broadcast instructions with the same inner scalar. */ + unsigned HOST_WIDE_INT count; + /* The threshold of broadcast instructions with the same inner + scalar. */ + unsigned int threshold; + /* The widest broadcast size in bytes. */ + unsigned int size; + /* Load kind. */ + x86_cse_kind kind; +}; + +/* Return the inner scalar if OP is a broadcast, else return nullptr. */ + +static rtx +ix86_broadcast_inner (rtx op, machine_mode mode, + machine_mode *scalar_mode_p, + x86_cse_kind *kind_p, rtx_insn **insn_p) +{ + if (op == const0_rtx || op == CONST0_RTX (mode)) + { + *scalar_mode_p = QImode; + *kind_p = X86_CSE_CONST0_VECTOR; + *insn_p = nullptr; + return const0_rtx; + } + else if ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && (op == constm1_rtx || op == CONSTM1_RTX (mode))) + || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + && float_vector_all_ones_operand (op, mode))) + { + *scalar_mode_p = QImode; + *kind_p = X86_CSE_CONSTM1_VECTOR; + *insn_p = nullptr; + return constm1_rtx; + } + + mode = GET_MODE (op); + int nunits = GET_MODE_NUNITS (mode); + if (nunits < 2) + return nullptr; + + *kind_p = X86_CSE_VEC_DUP; + + rtx reg; + if (GET_CODE (op) == VEC_DUPLICATE) + { + /* Only + (vec_duplicate:V4SI (reg:SI 99)) + (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S8 A64])) + are supported. Set OP to the broadcast source by default. */ + op = XEXP (op, 0); + reg = op; + if (SUBREG_P (op) + && SUBREG_BYTE (op) == 0 + && !paradoxical_subreg_p (op)) + reg = SUBREG_REG (op); + if (!REG_P (reg)) + { + if (MEM_P (op) + && SYMBOL_REF_P (XEXP (op, 0)) + && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0))) + { + /* Handle constant broadcast from memory. */ + *scalar_mode_p = GET_MODE_INNER (mode); + *insn_p = nullptr; + return op; + } + return nullptr; + } + } + else if (CONST_VECTOR_P (op)) + { + rtx first = XVECEXP (op, 0, 0); + for (int i = 1; i < nunits; ++i) + { + rtx tmp = XVECEXP (op, 0, i); + /* Vector duplicate value. */ + if (!rtx_equal_p (tmp, first)) + return nullptr; + } + *scalar_mode_p = GET_MODE (first); + *insn_p = nullptr; + return first; + } + else + return nullptr; + + mode = GET_MODE (op); + + /* Only single def chain is supported. */ + df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg)); + if (!ref + || DF_REF_IS_ARTIFICIAL (ref) + || DF_REF_NEXT_REG (ref) != nullptr) + return nullptr; + + rtx_insn *insn = DF_REF_INSN (ref); + rtx set = single_set (insn); + if (!set) + return nullptr; + + rtx src = SET_SRC (set); + + if (CONST_INT_P (src)) + { + /* Handle sequences like + + (set (reg:SI 99) + (const_int 34 [0x22])) + (set (reg:V4SI 98) + (vec_duplicate:V4SI (reg:SI 99))) + + Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an + integer constant. */ + op = src; + *insn_p = nullptr; + } + else + { + /* Handle sequences like + + (set (reg:QI 105 [ c ]) + (reg:QI 5 di [ c ])) + (set (reg:V64QI 102 [ _1 ]) + (vec_duplicate:V64QI (reg:QI 105 [ c ]))) + + (set (reg/v:SI 116 [ argc ]) + (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32])) + (set (reg:V4SI 119 [ _45 ]) + (vec_duplicate:V4SI (reg/v:SI 116 [ argc ]))) + + (set (reg:SI 98 [ _1 ]) + (sign_extend:SI (reg:QI 106 [ c ]))) + (set (reg:V16SI 103 [ _2 ]) + (vec_duplicate:V16SI (reg:SI 98 [ _1 ]))) + + (set (reg:SI 102 [ cost ]) + (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40]))) + (set (reg:V4HI 103 [ _16 ]) + (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0))) + + (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0) + (ashift:SI (reg:SI 158) + (subreg:QI (reg:SI 156 [ _2 ]) 0))) + (set (reg:V16HI 183 [ _61 ]) + (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ]))) + + Set *INSN_P to INSN and return the broadcast source otherwise. */ + *insn_p = insn; + } + + *scalar_mode_p = mode; + return op; +} + +/* At entry of the nearest common dominator for basic blocks with vector + CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest + vector set instruction for all CONST0_RTX and integer CONSTM1_RTX + uses. + + NB: We want to generate only a single widest vector set to cover the + whole function. The LCM algorithm isn't appropriate here since it + may place a vector set inside the loop. */ + +static unsigned int +remove_redundant_vector_load (void) +{ + timevar_push (TV_MACH_DEP); + + auto_vec<redundant_load *> loads; + redundant_load *load; + basic_block bb; + rtx_insn *insn; + unsigned int i; + + df_set_flags (DF_DEFER_INSN_RESCAN); + + bool recursive_call_p = cfun->machine->recursive_function; + + FOR_EACH_BB_FN (bb, cfun) + { + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + rtx set = single_set (insn); + if (!set) + continue; + + /* Record single set vector instruction with CONST0_RTX and + CONSTM1_RTX source. Record basic blocks with CONST0_RTX and + CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the + maximum size of CONST0_RTX and CONSTM1_RTX. */ + + rtx dest = SET_DEST (set); + machine_mode mode = GET_MODE (dest); + /* Skip non-vector instruction. */ + if (!VECTOR_MODE_P (mode)) + continue; + + rtx src = SET_SRC (set); + /* Skip non-vector load instruction. */ + if (!REG_P (dest) && !SUBREG_P (dest)) + continue; + + rtx_insn *def_insn; + machine_mode scalar_mode; + x86_cse_kind kind; + rtx val = ix86_broadcast_inner (src, mode, &scalar_mode, + &kind, &def_insn); + if (!val) + continue; + + /* Remove redundant register loads if there are more than 2 + loads will be used. */ + unsigned int threshold = 2; + + /* Check if there is a matching redundant vector load. */ + bool matched = false; + FOR_EACH_VEC_ELT (loads, i, load) + if (load->val + && load->kind == kind + && load->mode == scalar_mode + && (load->bb == bb + || kind < X86_CSE_VEC_DUP + /* Non all 0s/1s vector load must be in the same + basic block if it is in a recursive call. */ + || !recursive_call_p) + && rtx_equal_p (load->val, val)) + { + /* Record vector instruction. */ + bitmap_set_bit (load->insns, INSN_UID (insn)); + + /* Record the maximum vector size. */ + if (load->size < GET_MODE_SIZE (mode)) + load->size = GET_MODE_SIZE (mode); + + /* Record the basic block. */ + bitmap_set_bit (load->bbs, bb->index); + load->count++; + matched = true; + break; + } + + if (matched) + continue; + + /* We see this vector broadcast the first time. */ + load = new redundant_load; + + load->val = copy_rtx (val); + load->mode = scalar_mode; + load->size = GET_MODE_SIZE (mode); + load->def_insn = def_insn; + load->count = 1; + load->threshold = threshold; + load->bb = BLOCK_FOR_INSN (insn); + load->kind = kind; + + bitmap_set_bit (load->insns, INSN_UID (insn)); + bitmap_set_bit (load->bbs, bb->index); + + loads.safe_push (load); + } + } + + bool replaced = false; + rtx reg, broadcast_source, broadcast_reg; + FOR_EACH_VEC_ELT (loads, i, load) + if (load->count >= load->threshold) + { + machine_mode mode = ix86_get_vector_cse_mode (load->size, + load->mode); + broadcast_reg = gen_reg_rtx (mode); + if (load->def_insn) + { + /* Replace redundant vector loads with a single vector load + in the same basic block. */ + reg = load->val; + if (load->mode != GET_MODE (reg)) + reg = gen_rtx_SUBREG (load->mode, reg, 0); + broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg); + replace_vector_const (mode, broadcast_reg, load->insns, + load->mode); + } + else + { + /* This is a constant integer/double vector. If the + inner scalar is 0 or -1, set vector to CONST0_RTX + or CONSTM1_RTX directly. */ + rtx reg; + switch (load->kind) + { + case X86_CSE_CONST0_VECTOR: + broadcast_source = CONST0_RTX (mode); + break; + case X86_CSE_CONSTM1_VECTOR: + broadcast_source = CONSTM1_RTX (mode); + break; + default: + reg = gen_reg_rtx (load->mode); + broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg); + break; + } + replace_vector_const (mode, broadcast_reg, load->insns, + load->mode); + } + load->broadcast_source = broadcast_source; + load->broadcast_reg = broadcast_reg; + replaced = true; + } + + if (replaced) + { + auto_vec<rtx_insn *> control_flow_insns; + + /* (Re-)discover loops so that bb->loop_father can be used in the + analysis below. */ + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); + + FOR_EACH_VEC_ELT (loads, i, load) + if (load->count >= load->threshold) + { + if (load->def_insn) + { + /* Insert a broadcast after the original scalar + definition. */ + rtx set = gen_rtx_SET (load->broadcast_reg, + load->broadcast_source); + insn = emit_insn_after (set, load->def_insn); + + if (cfun->can_throw_non_call_exceptions) + { + /* Handle REG_EH_REGION note in DEF_INSN. */ + rtx note = find_reg_note (load->def_insn, + REG_EH_REGION, nullptr); + if (note) + { + control_flow_insns.safe_push (load->def_insn); + add_reg_note (insn, REG_EH_REGION, + XEXP (note, 0)); + } + } + + if (dump_file) + { + fprintf (dump_file, "\nAdd:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, load->def_insn); + fprintf (dump_file, "\n"); + } + } + else + ix86_place_single_vector_set (load->broadcast_reg, + load->broadcast_source, + load->bbs, + (load->kind == X86_CSE_VEC_DUP + ? load->val + : nullptr)); + } + + loop_optimizer_finalize (); + + if (!control_flow_insns.is_empty ()) + { + free_dominance_info (CDI_DOMINATORS); + + FOR_EACH_VEC_ELT (control_flow_insns, i, insn) + if (control_flow_insn_p (insn)) + { + /* Split the block after insn. There will be a fallthru + edge, which is OK so we keep it. We have to create + the exception edges ourselves. */ + bb = BLOCK_FOR_INSN (insn); + split_block (bb, insn); + rtl_make_eh_edge (NULL, bb, BB_END (bb)); + } + } + + df_process_deferred_rescans (); + } + + df_clear_flags (DF_DEFER_INSN_RESCAN); + + timevar_pop (TV_MACH_DEP); + return 0; +} + +namespace { + +const pass_data pass_data_remove_redundant_vector_load = +{ + RTL_PASS, /* type */ + "rrvl", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_remove_redundant_vector_load : public rtl_opt_pass +{ +public: + pass_remove_redundant_vector_load (gcc::context *ctxt) + : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *fun) final override + { + return (TARGET_SSE2 + && optimize + && optimize_function_for_speed_p (fun)); + } + + unsigned int execute (function *) final override + { + return remove_redundant_vector_load (); + } +}; // class pass_remove_redundant_vector_load + +} // anon namespace + +rtl_opt_pass * +make_pass_remove_redundant_vector_load (gcc::context *ctxt) +{ + return new pass_remove_redundant_vector_load (ctxt); +} + /* Convert legacy instructions that clobbers EFLAGS to APX_NF instructions when there are no flag set between a flag producer and user. */ @@ -3962,7 +4625,6 @@ ix86_get_function_versions_dispatcher (void *decl) struct cgraph_node *node = NULL; struct cgraph_node *default_node = NULL; struct cgraph_function_version_info *node_v = NULL; - struct cgraph_function_version_info *first_v = NULL; tree dispatch_decl = NULL; @@ -3979,37 +4641,16 @@ ix86_get_function_versions_dispatcher (void *decl) if (node_v->dispatcher_resolver != NULL) return node_v->dispatcher_resolver; - /* Find the default version and make it the first node. */ - first_v = node_v; - /* Go to the beginning of the chain. */ - while (first_v->prev != NULL) - first_v = first_v->prev; - default_version_info = first_v; - while (default_version_info != NULL) - { - if (is_function_default_version - (default_version_info->this_node->decl)) - break; - default_version_info = default_version_info->next; - } + /* The default node is always the beginning of the chain. */ + default_version_info = node_v; + while (default_version_info->prev != NULL) + default_version_info = default_version_info->prev; + default_node = default_version_info->this_node; /* If there is no default node, just return NULL. */ - if (default_version_info == NULL) + if (!is_function_default_version (default_node->decl)) return NULL; - /* Make default info the first node. */ - if (first_v != default_version_info) - { - default_version_info->prev->next = default_version_info->next; - if (default_version_info->next) - default_version_info->next->prev = default_version_info->prev; - first_v->prev = default_version_info; - default_version_info->next = first_v; - default_version_info->prev = NULL; - } - - default_node = default_version_info->this_node; - #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) if (targetm.has_ifunc_p ()) { diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h index 24b0c4e..e3719b3 100644 --- a/gcc/config/i386/i386-features.h +++ b/gcc/config/i386/i386-features.h @@ -153,12 +153,13 @@ class scalar_chain bitmap insns_conv; hash_map<rtx, rtx> defs_map; - unsigned n_sse_to_integer; - unsigned n_integer_to_sse; + /* Cost of inserted conversion between ineteger and sse. */ + int cost_sse_integer; + sreal weighted_cost_sse_integer; auto_vec<rtx_insn *> control_flow_insns; bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed); - virtual int compute_convert_gain () = 0; + virtual bool compute_convert_gain () = 0; int convert (); protected: @@ -184,11 +185,11 @@ class general_scalar_chain : public scalar_chain public: general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) : scalar_chain (smode_, vmode_) {} - int compute_convert_gain () final override; + bool compute_convert_gain () final override; private: void convert_insn (rtx_insn *insn) final override; - int vector_const_cost (rtx exp); + int vector_const_cost (rtx exp, basic_block bb); rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn); }; @@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain { public: timode_scalar_chain () : scalar_chain (TImode, V1TImode) {} - int compute_convert_gain () final override; + bool compute_convert_gain () final override; private: void fix_debug_reg_uses (rtx reg); diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 19d78d7..6fa601d 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -118,8 +118,6 @@ DEF_PTA(SHA512) DEF_PTA(SM4) DEF_PTA(APX_F) DEF_PTA(USER_MSR) -DEF_PTA(EVEX512) -DEF_PTA(AVX10_1_256) DEF_PTA(AVX10_1) DEF_PTA(AVX10_2) DEF_PTA(AMX_AVX512) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index a9fac01..09cb133 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -259,9 +259,7 @@ static struct ix86_target_opts isa2_opts[] = { "-msm3", OPTION_MASK_ISA2_SM3 }, { "-msha512", OPTION_MASK_ISA2_SHA512 }, { "-msm4", OPTION_MASK_ISA2_SM4 }, - { "-mevex512", OPTION_MASK_ISA2_EVEX512 }, { "-musermsr", OPTION_MASK_ISA2_USER_MSR }, - { "-mavx10.1-256", OPTION_MASK_ISA2_AVX10_1_256 }, { "-mavx10.1", OPTION_MASK_ISA2_AVX10_1 }, { "-mavx10.2", OPTION_MASK_ISA2_AVX10_2 }, { "-mamx-avx512", OPTION_MASK_ISA2_AMX_AVX512 }, @@ -713,8 +711,6 @@ ix86_function_specific_save (struct cl_target_option *ptr, ptr->x_ix86_apx_features = opts->x_ix86_apx_features; ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit; ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit; - ptr->x_ix86_no_avx512_explicit = opts->x_ix86_no_avx512_explicit; - ptr->x_ix86_no_avx10_1_explicit = opts->x_ix86_no_avx10_1_explicit; ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit; ptr->x_ix86_arch_string = opts->x_ix86_arch_string; ptr->x_ix86_tune_string = opts->x_ix86_tune_string; @@ -764,63 +760,63 @@ static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = { /* This table must be in sync with enum processor_type in i386.h. */ static const struct processor_costs *processor_cost_table[] = { - &generic_cost, - &i386_cost, - &i486_cost, - &pentium_cost, - &lakemont_cost, - &pentiumpro_cost, - &pentium4_cost, - &nocona_cost, - &core_cost, - &core_cost, - &core_cost, - &core_cost, - &atom_cost, - &slm_cost, - &slm_cost, - &slm_cost, - &tremont_cost, - &alderlake_cost, - &alderlake_cost, - &alderlake_cost, - &skylake_cost, - &skylake_cost, - &icelake_cost, - &icelake_cost, - &icelake_cost, - &skylake_cost, - &icelake_cost, - &skylake_cost, - &icelake_cost, - &alderlake_cost, - &icelake_cost, - &icelake_cost, - &icelake_cost, - &alderlake_cost, - &alderlake_cost, - &alderlake_cost, - &icelake_cost, - &intel_cost, - &lujiazui_cost, - &yongfeng_cost, - &shijidadao_cost, - &geode_cost, - &k6_cost, - &athlon_cost, - &k8_cost, - &amdfam10_cost, - &bdver_cost, - &bdver_cost, - &bdver_cost, - &bdver_cost, - &btver1_cost, - &btver2_cost, - &znver1_cost, - &znver2_cost, - &znver3_cost, - &znver4_cost, - &znver5_cost + &generic_cost, /* PROCESSOR_GENERIC. */ + &i386_cost, /* PROCESSOR_I386. */ + &i486_cost, /* PROCESSOR_I486. */ + &pentium_cost, /* PROCESSOR_PENTIUM. */ + &lakemont_cost, /* PROCESSOR_LAKEMONT. */ + &pentiumpro_cost, /* PROCESSOR_PENTIUMPRO. */ + &pentium4_cost, /* PROCESSOR_PENTIUM4. */ + &nocona_cost, /* PROCESSOR_NOCONA. */ + &core_cost, /* PROCESSOR_CORE2. */ + &core_cost, /* PROCESSOR_NEHALEM. */ + &core_cost, /* PROCESSOR_SANDYBRIDGE. */ + &core_cost, /* PROCESSOR_HASWELL. */ + &atom_cost, /* PROCESSOR_BONNELL. */ + &slm_cost, /* PROCESSOR_SILVERMONT. */ + &slm_cost, /* PROCESSOR_GOLDMONT. */ + &slm_cost, /* PROCESSOR_GOLDMONT_PLUS. */ + &tremont_cost, /* PROCESSOR_TREMONT. */ + &alderlake_cost, /* PROCESSOR_SIERRAFOREST. */ + &alderlake_cost, /* PROCESSOR_GRANDRIDGE. */ + &alderlake_cost, /* PROCESSOR_CLEARWATERFOREST. */ + &skylake_cost, /* PROCESSOR_SKYLAKE. */ + &skylake_cost, /* PROCESSOR_SKYLAKE_AVX512. */ + &icelake_cost, /* PROCESSOR_CANNONLAKE. */ + &icelake_cost, /* PROCESSOR_ICELAKE_CLIENT. */ + &icelake_cost, /* PROCESSOR_ICELAKE_SERVER. */ + &skylake_cost, /* PROCESSOR_CASCADELAKE. */ + &icelake_cost, /* PROCESSOR_TIGERLAKE. */ + &skylake_cost, /* PROCESSOR_COOPERLAKE. */ + &icelake_cost, /* PROCESSOR_SAPPHIRERAPIDS. */ + &alderlake_cost, /* PROCESSOR_ALDERLAKE. */ + &icelake_cost, /* PROCESSOR_ROCKETLAKE. */ + &icelake_cost, /* PROCESSOR_GRANITERAPIDS. */ + &icelake_cost, /* PROCESSOR_GRANITERAPIDS_D. */ + &alderlake_cost, /* PROCESSOR_ARROWLAKE. */ + &alderlake_cost, /* PROCESSOR_ARROWLAKE_S. */ + &alderlake_cost, /* PROCESSOR_PANTHERLAKE. */ + &icelake_cost, /* PROCESSOR_DIAMONDRAPIDS. */ + &alderlake_cost, /* PROCESSOR_INTEL. */ + &lujiazui_cost, /* PROCESSOR_LUJIAZUI. */ + &yongfeng_cost, /* PROCESSOR_YONGFENG. */ + &shijidadao_cost, /* PROCESSOR_SHIJIDADAO. */ + &geode_cost, /* PROCESSOR_GEODE. */ + &k6_cost, /* PROCESSOR_K6. */ + &athlon_cost, /* PROCESSOR_ATHLON. */ + &k8_cost, /* PROCESSOR_K8. */ + &amdfam10_cost, /* PROCESSOR_AMDFAM10. */ + &bdver_cost, /* PROCESSOR_BDVER1. */ + &bdver_cost, /* PROCESSOR_BDVER2. */ + &bdver_cost, /* PROCESSOR_BDVER3. */ + &bdver_cost, /* PROCESSOR_BDVER4. */ + &btver1_cost, /* PROCESSOR_BTVER1. */ + &btver2_cost, /* PROCESSOR_BTVER2. */ + &znver1_cost, /* PROCESSOR_ZNVER1. */ + &znver2_cost, /* PROCESSOR_ZNVER2. */ + &znver3_cost, /* PROCESSOR_ZNVER3. */ + &znver4_cost, /* PROCESSOR_ZNVER4. */ + &znver5_cost /* PROCESSOR_ZNVER5. */ }; /* Guarantee that the array is aligned with enum processor_type. */ @@ -858,8 +854,6 @@ ix86_function_specific_restore (struct gcc_options *opts, opts->x_ix86_apx_features = ptr->x_ix86_apx_features; opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit; - opts->x_ix86_no_avx512_explicit = ptr->x_ix86_no_avx512_explicit; - opts->x_ix86_no_avx10_1_explicit = ptr->x_ix86_no_avx10_1_explicit; opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit; opts->x_ix86_arch_string = ptr->x_ix86_arch_string; opts->x_ix86_tune_string = ptr->x_ix86_tune_string; @@ -1131,11 +1125,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("sha512", OPT_msha512), IX86_ATTR_ISA ("sm4", OPT_msm4), IX86_ATTR_ISA ("apxf", OPT_mapxf), - IX86_ATTR_ISA ("evex512", OPT_mevex512), IX86_ATTR_ISA ("usermsr", OPT_musermsr), - IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256), IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1), - IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1), IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2), IX86_ATTR_ISA ("amx-avx512", OPT_mamx_avx512), IX86_ATTR_ISA ("amx-tf32", OPT_mamx_tf32), @@ -1271,13 +1262,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], } } - /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated. */ - if (opt == OPT_msse4 && !opt_set_p) - { - opt = OPT_mno_sse4; - opt_set_p = true; - } - /* Process the option. */ if (opt == N_OPTS) { @@ -1436,18 +1420,6 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, target_clone_attr)) return error_mark_node; - /* AVX10.1-256 will enable only 256 bit AVX512F features by setting all - AVX512 related ISA flags and not setting EVEX512. When it is used - with avx512 related function attribute, we need to enable 512 bit to - align with the command line behavior. Manually set EVEX512 for this - scenario. */ - if ((def->x_ix86_isa_flags2 & OPTION_MASK_ISA2_AVX10_1_256) - && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512F) - && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F) - && !(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512) - && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)) - opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512; - /* If the changed options are different from the default, rerun ix86_option_override_internal, and then save the options away. The string options are attribute options, and will be undone @@ -1458,10 +1430,7 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] || enum_opts_set.x_ix86_fpmath - || enum_opts_set.x_prefer_vector_width_type - || (!(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AVX10_1_256) - && (opts->x_ix86_isa_flags2_explicit - & OPTION_MASK_ISA2_AVX10_1_256))) + || enum_opts_set.x_prefer_vector_width_type) { /* If we are using the default tune= or arch=, undo the string assigned, and use the default. */ @@ -2025,7 +1994,7 @@ ix86_option_override_internal (bool main_args_p, struct gcc_options *opts_set) { unsigned int i; - unsigned HOST_WIDE_INT ix86_arch_mask, avx512_isa_flags, avx512_isa_flags2; + unsigned HOST_WIDE_INT ix86_arch_mask; const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); /* -mrecip options. */ @@ -2044,15 +2013,6 @@ ix86_option_override_internal (bool main_args_p, { "vec-sqrt", RECIP_MASK_VEC_SQRT }, }; - avx512_isa_flags = OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD - | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW - | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA - | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2 - | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ - | OPTION_MASK_ISA_AVX512BITALG; - avx512_isa_flags2 = OPTION_MASK_ISA2_AVX512FP16 - | OPTION_MASK_ISA2_AVX512BF16; - /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) @@ -2674,107 +2634,6 @@ ix86_option_override_internal (bool main_args_p, &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM) & ~opts->x_ix86_isa_flags_explicit); - /* Emit a warning if AVX10.1 options is used with AVX512/EVEX512 options except - for the following option combinations: - 1. Both AVX10.1-512 and AVX512 with 512 bit vector width are enabled with no - explicit disable on other AVX512 features. - 2. Both AVX10.1-256 and AVX512 w/o 512 bit vector width are enabled with no - explicit disable on other AVX512 features. - 3. Both AVX10.1 and AVX512 are disabled. */ - if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2)) - { - if (opts->x_ix86_no_avx512_explicit - && (((~(avx512_isa_flags & opts->x_ix86_isa_flags) - & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit))) - || ((~((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512) - & opts->x_ix86_isa_flags2) - & ((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512) - & opts->x_ix86_isa_flags2_explicit))))) - warning (0, "%<-mno-evex512%> or %<-mno-avx512XXX%> cannot disable " - "AVX10 instructions when AVX10.1-512 is available in GCC 15, " - "behavior will change to it will disable that part of " - "AVX512 instructions since GCC 16"); - } - else if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2)) - { - if (TARGET_EVEX512_P (opts->x_ix86_isa_flags2) - && (OPTION_MASK_ISA2_EVEX512 & opts->x_ix86_isa_flags2_explicit)) - { - if (!TARGET_AVX512F_P (opts->x_ix86_isa_flags) - || !(OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit)) - { - /* We should not emit 512 bit instructions under AVX10.1-256 - when EVEX512 is enabled w/o any AVX512 features enabled. - Disable EVEX512 bit for this. */ - warning (0, "Using %<-mevex512%> without any AVX512 features " - "enabled together with AVX10.1 only will not enable " - "any AVX512 or AVX10.1-512 features, using 256 as " - "max vector size"); - opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_EVEX512; - } - else - warning (0, "Vector size conflicts between AVX10.1 and AVX512, " - "using 512 as max vector size"); - } - else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F) - && !(OPTION_MASK_ISA2_EVEX512 - & opts->x_ix86_isa_flags2_explicit)) - warning (0, "Vector size conflicts between AVX10.1 and AVX512, using " - "512 as max vector size"); - else if (opts->x_ix86_no_avx512_explicit - && (((~(avx512_isa_flags & opts->x_ix86_isa_flags) - & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit))) - || ((~(avx512_isa_flags2 & opts->x_ix86_isa_flags2) - & (avx512_isa_flags2 - & opts->x_ix86_isa_flags2_explicit))))) - warning (0, "%<-mno-avx512XXX%> cannot disable AVX10 instructions " - "when AVX10 is available in GCC 15, behavior will change " - "to it will disable that part of AVX512 instructions since " - "GCC 16"); - } - else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && (OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit)) - { - if (opts->x_ix86_no_avx10_1_explicit - && ((OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1) - & opts->x_ix86_isa_flags2_explicit)) - { - warning (0, "%<-mno-avx10.1-256, -mno-avx10.1-512%> cannot disable " - "AVX512 instructions when %<-mavx512XXX%> in GCC 15, " - "behavior will change to it will disable all the " - "instructions in GCC 16"); - /* Reset those unset AVX512 flags set by AVX10 options when AVX10 is - disabled. */ - if (OPTION_MASK_ISA2_AVX10_1_256 & opts->x_ix86_isa_flags2_explicit) - { - opts->x_ix86_isa_flags = (~avx512_isa_flags - & opts->x_ix86_isa_flags) - | (avx512_isa_flags & opts->x_ix86_isa_flags - & opts->x_ix86_isa_flags_explicit); - opts->x_ix86_isa_flags2 = (~avx512_isa_flags2 - & opts->x_ix86_isa_flags2) - | (avx512_isa_flags2 & opts->x_ix86_isa_flags2 - & opts->x_ix86_isa_flags2_explicit); - } - } - } - - /* Set EVEX512 if one of the following conditions meets: - 1. AVX512 is enabled while EVEX512 is not explicitly set/unset. - 2. AVX10.1-512 is enabled. */ - if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2) - || (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512))) - opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512; - - /* Enable all AVX512 related ISAs when AVX10.1 is enabled. */ - if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2)) - { - opts->x_ix86_isa_flags |= avx512_isa_flags; - opts->x_ix86_isa_flags2 |= avx512_isa_flags2; - } - /* Validate -mpreferred-stack-boundary= value or default it to PREFERRED_STACK_BOUNDARY_DEFAULT. */ ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; @@ -2828,8 +2687,8 @@ ix86_option_override_internal (bool main_args_p, if (flag_nop_mcount) error ("%<-mnop-mcount%> is not compatible with this target"); #endif - if (flag_nop_mcount && flag_pic) - error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>"); + if (flag_nop_mcount && flag_pic && !flag_plt) + error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>"); /* Accept -msseregparm only if at least SSE support is enabled. */ if (TARGET_SSEREGPARM_P (opts->x_target_flags) @@ -3049,8 +2908,7 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_move_max = opts->x_prefer_vector_width_type; if (opts_set->x_ix86_move_max == PVW_NONE) { - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) opts->x_ix86_move_max = PVW_AVX512; /* Align with vectorizer to avoid potential STLF issue. */ else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) @@ -3076,8 +2934,7 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_store_max = opts->x_prefer_vector_width_type; if (opts_set->x_ix86_store_max == PVW_NONE) { - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) opts->x_ix86_store_max = PVW_AVX512; /* Align with vectorizer to avoid potential STLF issue. */ else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) @@ -3374,13 +3231,13 @@ ix86_simd_clone_adjust (struct cgraph_node *node) case 'e': if (TARGET_PREFER_AVX256) { - if (!TARGET_AVX512F || !TARGET_EVEX512) - str = "avx512f,evex512,prefer-vector-width=512"; + if (!TARGET_AVX512F) + str = "avx512f,prefer-vector-width=512"; else str = "prefer-vector-width=512"; } - else if (!TARGET_AVX512F || !TARGET_EVEX512) - str = "avx512f,evex512"; + else if (!TARGET_AVX512F) + str = "avx512f"; break; default: gcc_unreachable (); @@ -3420,19 +3277,21 @@ ix86_set_func_type (tree fndecl) interrupt function in this case. */ enum call_saved_registers_type no_callee_saved_registers = TYPE_DEFAULT_CALL_SAVED_REGISTERS; - if (lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + if (lookup_attribute ("preserve_none", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + no_callee_saved_registers = TYPE_PRESERVE_NONE; + else if ((lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + || (ix86_noreturn_no_callee_saved_registers + && TREE_THIS_VOLATILE (fndecl) + && optimize + && !optimize_debug + && (TREE_NOTHROW (fndecl) || !flag_exceptions) + && !lookup_attribute ("interrupt", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl))) + && !lookup_attribute ("no_caller_saved_registers", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))) no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS; - else if (ix86_noreturn_no_callee_saved_registers - && TREE_THIS_VOLATILE (fndecl) - && optimize - && !optimize_debug - && (TREE_NOTHROW (fndecl) || !flag_exceptions) - && !lookup_attribute ("interrupt", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl))) - && !lookup_attribute ("no_caller_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) - no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP; if (cfun->machine->func_type == TYPE_UNKNOWN) { @@ -3444,9 +3303,16 @@ ix86_set_func_type (tree fndecl) "interrupt and naked attributes are not compatible"); if (no_callee_saved_registers) - error_at (DECL_SOURCE_LOCATION (fndecl), - "%qs and %qs attributes are not compatible", - "interrupt", "no_callee_saved_registers"); + { + const char *attr; + if (no_callee_saved_registers == TYPE_PRESERVE_NONE) + attr = "preserve_none"; + else + attr = "no_callee_saved_registers"; + error_at (DECL_SOURCE_LOCATION (fndecl), + "%qs and %qs attributes are not compatible", + "interrupt", attr); + } int nargs = 0; for (tree arg = DECL_ARGUMENTS (fndecl); @@ -3468,21 +3334,13 @@ ix86_set_func_type (tree fndecl) else { cfun->machine->func_type = TYPE_NORMAL; - if (lookup_attribute ("no_caller_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + if (no_callee_saved_registers) + cfun->machine->call_saved_registers + = no_callee_saved_registers; + else if (lookup_attribute ("no_caller_saved_registers", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) cfun->machine->call_saved_registers = TYPE_NO_CALLER_SAVED_REGISTERS; - if (no_callee_saved_registers) - { - if (cfun->machine->call_saved_registers - == TYPE_NO_CALLER_SAVED_REGISTERS) - error_at (DECL_SOURCE_LOCATION (fndecl), - "%qs and %qs attributes are not compatible", - "no_caller_saved_registers", - "no_callee_saved_registers"); - cfun->machine->call_saved_registers - = no_callee_saved_registers; - } } } } @@ -3671,11 +3529,21 @@ ix86_set_current_function (tree fndecl) || (cfun->machine->call_saved_registers == TYPE_NO_CALLER_SAVED_REGISTERS)) { - /* Don't allow SSE, MMX nor x87 instructions since they - may change processor state. */ + /* Don't allow AVX, AVX512, MMX nor x87 instructions since they + may change processor state. Don't allow SSE instructions in + exception/interrupt service routines. */ const char *isa; if (TARGET_SSE) - isa = "SSE"; + { + if (TARGET_AVX512F) + isa = "AVX512"; + else if (TARGET_AVX) + isa = "AVX"; + else if (cfun->machine->func_type != TYPE_NORMAL) + isa = "SSE"; + else + isa = NULL; + } else if (TARGET_MMX) isa = "MMX/3Dnow"; else if (TARGET_80387) @@ -4100,9 +3968,50 @@ ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int, } static tree -ix86_handle_call_saved_registers_attribute (tree *, tree, tree, +ix86_handle_call_saved_registers_attribute (tree *node, tree name, tree, int, bool *) { + const char *attr1 = nullptr; + const char *attr2 = nullptr; + + if (is_attribute_p ("no_callee_saved_registers", name)) + { + /* Disallow preserve_none and no_caller_saved_registers + attributes. */ + attr1 = "no_callee_saved_registers"; + if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node))) + attr2 = "preserve_none"; + else if (lookup_attribute ("no_caller_saved_registers", + TYPE_ATTRIBUTES (*node))) + attr2 = "no_caller_saved_registers"; + } + else if (is_attribute_p ("no_caller_saved_registers", name)) + { + /* Disallow preserve_none and no_callee_saved_registers + attributes. */ + attr1 = "no_caller_saved_registers"; + if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node))) + attr2 = "preserve_none"; + else if (lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (*node))) + attr2 = "no_callee_saved_registers"; + } + else if (is_attribute_p ("preserve_none", name)) + { + /* Disallow no_callee_saved_registers and no_caller_saved_registers + attributes. */ + attr1 = "preserve_none"; + if (lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (*node))) + attr2 = "no_caller_saved_registers"; + else if (lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (*node))) + attr2 = "no_callee_saved_registers"; + } + + if (attr2) + error ("%qs and %qs attributes are not compatible", attr1, attr2); + return NULL_TREE; } @@ -4264,6 +4173,8 @@ static const attribute_spec ix86_gnu_attributes[] = ix86_handle_interrupt_attribute, NULL }, { "no_caller_saved_registers", 0, 0, false, true, true, false, ix86_handle_call_saved_registers_attribute, NULL }, + { "preserve_none", 0, 0, false, true, true, true, + ix86_handle_call_saved_registers_attribute, NULL }, { "no_callee_saved_registers", 0, 0, false, true, true, true, ix86_handle_call_saved_registers_attribute, NULL }, { "naked", 0, 0, true, false, false, false, diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def index 39f8bc6..06f0288 100644 --- a/gcc/config/i386/i386-passes.def +++ b/gcc/config/i386/i386-passes.def @@ -35,5 +35,6 @@ along with GCC; see the file COPYING3. If not see PR116174. */ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops); + INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load); INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency); INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index bea3fd4..69bc0ee 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -186,6 +186,7 @@ extern void ix86_expand_v2di_ashiftrt (rtx[]); extern rtx ix86_replace_reg_with_reg (rtx, rtx, rtx); extern rtx ix86_find_base_term (rtx); extern bool ix86_check_movabs (rtx, int); +extern bool ix86_check_movs (rtx, int); extern bool ix86_check_no_addr_space (rtx); extern void ix86_split_idivmod (machine_mode, rtx[], bool); extern bool ix86_hardreg_mov_ok (rtx, rtx); @@ -198,6 +199,7 @@ extern int ix86_attr_length_vex_default (rtx_insn *, bool, bool); extern rtx ix86_libcall_value (machine_mode); extern bool ix86_function_arg_regno_p (int); extern void ix86_asm_output_function_label (FILE *, const char *, tree); +extern void ix86_asm_output_labelref (FILE *, const char *, const char *); extern void ix86_call_abi_override (const_tree); extern int ix86_reg_parm_stack_space (const_tree); @@ -280,6 +282,7 @@ extern tree ix86_valid_target_attribute_tree (tree, tree, struct gcc_options *, struct gcc_options *, bool); extern unsigned int ix86_get_callcvt (const_tree); +extern bool ix86_type_no_callee_saved_registers_p (const_tree); #endif @@ -427,12 +430,21 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area (gcc::context *); extern rtl_opt_pass *make_pass_remove_partial_avx_dependency (gcc::context *); +extern rtl_opt_pass *make_pass_remove_redundant_vector_load + (gcc::context *); extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *); extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *); extern bool ix86_has_no_direct_extern_access; extern bool ix86_rpad_gate (); +extern sbitmap ix86_get_separate_components (void); +extern sbitmap ix86_components_for_bb (basic_block); +extern void ix86_disqualify_components (sbitmap, edge, sbitmap, bool); +extern void ix86_emit_prologue_components (sbitmap); +extern void ix86_emit_epilogue_components (sbitmap); +extern void ix86_set_handled_components (sbitmap); + /* In i386-expand.cc. */ bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*, HOST_WIDE_INT*); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index b172f71..b64175d 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see #include "i386-features.h" #include "function-abi.h" #include "rtl-error.h" +#include "gimple-pretty-print.h" /* This file should be included last. */ #include "target-def.h" @@ -334,6 +335,14 @@ static int const x86_64_ms_abi_int_parameter_registers[4] = CX_REG, DX_REG, R8_REG, R9_REG }; +/* Similar as Clang's preserve_none function parameter passing. + NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p. */ + +static int const x86_64_preserve_none_int_parameter_registers[6] = +{ + R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG +}; + static int const x86_64_int_return_registers[4] = { AX_REG, DX_REG, DI_REG, SI_REG @@ -459,7 +468,8 @@ int ix86_arch_specified; red-zone. NB: Don't use red-zone for functions with no_caller_saved_registers - and 32 GPRs since 128-byte red-zone is too small for 31 GPRs. + and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small + for 31 GPRs or 15 GPRs + 16 XMM registers. TODO: If we can reserve the first 2 WORDs, for PUSH and, another for CALL, in red-zone, we can allow local indirect jumps with @@ -470,7 +480,7 @@ ix86_using_red_zone (void) { return (TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI - && (!TARGET_APX_EGPR + && ((!TARGET_APX_EGPR && !TARGET_SSE) || (cfun->machine->call_saved_registers != TYPE_NO_CALLER_SAVED_REGISTERS)) && (!cfun->machine->has_local_indirect_jump @@ -897,6 +907,18 @@ x86_64_elf_unique_section (tree decl, int reloc) default_unique_section (decl, reloc); } +/* Return true if TYPE has no_callee_saved_registers or preserve_none + attribute. */ + +bool +ix86_type_no_callee_saved_registers_p (const_tree type) +{ + return (lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (type)) != NULL + || lookup_attribute ("preserve_none", + TYPE_ATTRIBUTES (type)) != NULL); +} + #ifdef COMMON_ASM_OP #ifndef LARGECOMM_SECTION_ASM_OP @@ -1018,11 +1040,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) /* Sibling call isn't OK if callee has no callee-saved registers and the calling function has callee-saved registers. */ - if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS - && (cfun->machine->call_saved_registers - != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP) - && lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (type))) + if ((cfun->machine->call_saved_registers + != TYPE_NO_CALLEE_SAVED_REGISTERS) + && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE + && ix86_type_no_callee_saved_registers_p (type)) return false; /* If outgoing reg parm stack space changes, we cannot do sibcall. */ @@ -1187,10 +1208,16 @@ ix86_comp_type_attributes (const_tree type1, const_tree type2) != ix86_function_regparm (type2, NULL)) return 0; - if (lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (type1)) - != lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (type2))) + if (ix86_type_no_callee_saved_registers_p (type1) + != ix86_type_no_callee_saved_registers_p (type2)) + return 0; + + /* preserve_none attribute uses a different calling convention is + only for 64-bit. */ + if (TARGET_64BIT + && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1)) + != lookup_attribute ("preserve_none", + TYPE_ATTRIBUTES (type2)))) return 0; return 1; @@ -1552,7 +1579,10 @@ ix86_function_arg_regno_p (int regno) if (call_abi == SYSV_ABI && regno == AX_REG) return true; - if (call_abi == MS_ABI) + if (cfun + && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE) + parm_regs = x86_64_preserve_none_int_parameter_registers; + else if (call_abi == MS_ABI) parm_regs = x86_64_ms_abi_int_parameter_registers; else parm_regs = x86_64_int_parameter_registers; @@ -1715,6 +1745,19 @@ ix86_asm_output_function_label (FILE *out_file, const char *fname, } } +/* Output a user-defined label. In AT&T syntax, registers are prefixed + with %, so labels require no punctuation. In Intel syntax, registers + are unprefixed, so labels may clash with registers or other operators, + and require quoting. */ +void +ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label) +{ + if (ASSEMBLER_DIALECT == ASM_ATT) + fprintf (file, "%s%s", prefix, label); + else + fprintf (file, "\"%s%s\"", prefix, label); +} + /* Implementation of call abi switching target hook. Specific to FNDECL the specific call register sets are set. See also ix86_conditional_register_usage for more details. */ @@ -1794,8 +1837,7 @@ ix86_init_pic_reg (void) add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); } - seq = get_insns (); - end_sequence (); + seq = end_sequence (); entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); insert_insn_on_edge (seq, entry_edge); @@ -1822,6 +1864,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ memset (cum, 0, sizeof (*cum)); + tree preserve_none_type; if (fndecl) { target = cgraph_node::get (fndecl); @@ -1830,12 +1873,24 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ target = target->function_symbol (); local_info_node = cgraph_node::local_info_node (target->decl); cum->call_abi = ix86_function_abi (target->decl); + preserve_none_type = TREE_TYPE (target->decl); } else - cum->call_abi = ix86_function_abi (fndecl); + { + cum->call_abi = ix86_function_abi (fndecl); + preserve_none_type = TREE_TYPE (fndecl); + } } else - cum->call_abi = ix86_function_type_abi (fntype); + { + cum->call_abi = ix86_function_type_abi (fntype); + preserve_none_type = fntype; + } + cum->preserve_none_abi + = (preserve_none_type + && (lookup_attribute ("preserve_none", + TYPE_ATTRIBUTES (preserve_none_type)) + != nullptr)); cum->caller = caller; @@ -1997,8 +2052,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) && GET_MODE_INNER (mode) == innermode) { - if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512) - && !TARGET_IAMCU) + if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) { static bool warnedavx512f; static bool warnedavx512f_ret; @@ -3409,9 +3463,15 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, break; } + const int *parm_regs; + if (cum->preserve_none_abi) + parm_regs = x86_64_preserve_none_int_parameter_registers; + else + parm_regs = x86_64_int_parameter_registers; + return construct_container (mode, orig_mode, type, 0, cum->nregs, cum->sse_nregs, - &x86_64_int_parameter_registers [cum->regno], + &parm_regs[cum->regno], cum->sse_regno); } @@ -4421,7 +4481,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) /* AVX512F values are returned in ZMM0 if available. */ if (size == 64) - return !TARGET_AVX512F || !TARGET_EVEX512; + return !TARGET_AVX512F; } if (mode == XFmode) @@ -4576,6 +4636,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) if (max > X86_64_REGPARM_MAX) max = X86_64_REGPARM_MAX; + const int *parm_regs; + if (cum->preserve_none_abi) + parm_regs = x86_64_preserve_none_int_parameter_registers; + else + parm_regs = x86_64_int_parameter_registers; + for (i = cum->regno; i < max; i++) { mem = gen_rtx_MEM (word_mode, @@ -4583,8 +4649,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) MEM_NOTRAP_P (mem) = 1; set_mem_alias_set (mem, set); emit_move_insn (mem, - gen_rtx_REG (word_mode, - x86_64_int_parameter_registers[i])); + gen_rtx_REG (word_mode, parm_regs[i])); } if (ix86_varargs_fpr_size) @@ -4738,8 +4803,7 @@ ix86_va_start (tree valist, rtx nextarg) start_sequence (); emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); push_topmost_sequence (); emit_insn_after (seq, entry_of_function ()); @@ -5179,6 +5243,27 @@ ix86_check_movabs (rtx insn, int opnum) return volatile_ok || !MEM_VOLATILE_P (mem); } +/* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */ +bool +ix86_check_movs (rtx insn, int idx) +{ + rtx pat = PATTERN (insn); + gcc_assert (GET_CODE (pat) == PARALLEL); + + rtx set = XVECEXP (pat, 0, idx); + gcc_assert (GET_CODE (set) == SET); + + rtx dst = SET_DEST (set); + gcc_assert (MEM_P (dst)); + + rtx src = SET_SRC (set); + gcc_assert (MEM_P (src)); + + return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)) + && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)) + || Pmode == word_mode)); +} + /* Return false if INSN contains a MEM with a non-default address space. */ bool ix86_check_no_addr_space (rtx insn) @@ -5355,7 +5440,7 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode) switch (GET_MODE_SIZE (mode)) { case 64: - if (TARGET_AVX512F && TARGET_EVEX512) + if (TARGET_AVX512F) return 2; break; case 32: @@ -5408,10 +5493,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (TARGET_AVX512VL) return "vpxord\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vpxord\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vpxord\t%g0, %g0, %g0"; } return "vpxor\t%x0, %x0, %x0"; @@ -5427,19 +5510,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (TARGET_AVX512VL) return "vxorpd\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vxorpd\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vxorpd\t%g0, %g0, %g0"; } else { if (TARGET_AVX512VL) return "vpxorq\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vpxorq\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vpxorq\t%g0, %g0, %g0"; } } return "vxorpd\t%x0, %x0, %x0"; @@ -5456,19 +5535,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (TARGET_AVX512VL) return "vxorps\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vxorps\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vxorps\t%g0, %g0, %g0"; } else { if (TARGET_AVX512VL) return "vpxord\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vpxord\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vpxord\t%g0, %g0, %g0"; } } return "vxorps\t%x0, %x0, %x0"; @@ -5489,7 +5564,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) case MODE_XI: case MODE_V8DF: case MODE_V16SF: - gcc_assert (TARGET_AVX512F && TARGET_EVEX512); + gcc_assert (TARGET_AVX512F); return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; case MODE_OI: @@ -5505,10 +5580,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (TARGET_AVX512VL) return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"; - else if (TARGET_EVEX512) - return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; else - gcc_unreachable (); + return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; } return (TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" @@ -5522,7 +5595,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (GET_MODE_SIZE (mode) == 64) { - gcc_assert (TARGET_AVX512F && TARGET_EVEX512); + gcc_assert (TARGET_AVX512F); return "vpcmpeqd\t%t0, %t0, %t0"; } else if (GET_MODE_SIZE (mode) == 32) @@ -5534,7 +5607,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) } else if (vector_all_ones_zero_extend_quarter_operand (x, mode)) { - gcc_assert (TARGET_AVX512F && TARGET_EVEX512); + gcc_assert (TARGET_AVX512F); return "vpcmpeqd\t%x0, %x0, %x0"; } @@ -5645,8 +5718,6 @@ ix86_get_ssemov (rtx *operands, unsigned size, || memory_operand (operands[1], mode)) gcc_unreachable (); size = 64; - /* We need TARGET_EVEX512 to move into zmm register. */ - gcc_assert (TARGET_EVEX512); switch (type) { case opcode_int: @@ -5685,7 +5756,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); @@ -5727,7 +5798,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu8" : "%vmovdqu") : "%vmovdqa"); @@ -5747,7 +5818,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); @@ -6700,9 +6771,7 @@ ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) || !frame_pointer_needed)); case TYPE_NO_CALLEE_SAVED_REGISTERS: - return false; - - case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP: + case TYPE_PRESERVE_NONE: if (regno != HARD_FRAME_POINTER_REGNUM) return false; break; @@ -6779,7 +6848,9 @@ ix86_nsaved_sseregs (void) int nregs = 0; int regno; - if (!TARGET_64BIT_MS_ABI) + if (!TARGET_64BIT_MS_ABI + && (cfun->machine->call_saved_registers + != TYPE_NO_CALLER_SAVED_REGISTERS)) return 0; for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) @@ -6887,6 +6958,26 @@ ix86_pro_and_epilogue_can_use_push2pop2 (int nregs) && (nregs + aligned) >= 3; } +/* Check if push/pop should be used to save/restore registers. */ +static bool +save_regs_using_push_pop (HOST_WIDE_INT to_allocate) +{ + return ((!to_allocate && cfun->machine->frame.nregs <= 1) + || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) + /* If static stack checking is enabled and done with probes, + the registers need to be saved before allocating the frame. */ + || flag_stack_check == STATIC_BUILTIN_STACK_CHECK + /* If stack clash probing needs a loop, then it needs a + scratch register. But the returned register is only guaranteed + to be safe to use after register saves are complete. So if + stack clash protections are enabled and the allocated frame is + larger than the probe interval, then use pushes to save + callee saved registers. */ + || (flag_stack_clash_protection + && !ix86_target_stack_probe () + && to_allocate > get_probe_interval ())); +} + /* Fill structure ix86_frame about frame of currently computed function. */ static void @@ -6967,12 +7058,18 @@ ix86_compute_frame_layout (void) gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); gcc_assert (preferred_alignment <= stack_alignment_needed); - /* The only ABI saving SSE regs should be 64-bit ms_abi. */ - gcc_assert (TARGET_64BIT || !frame->nsseregs); + /* The only ABI saving SSE regs should be 64-bit ms_abi or with + no_caller_saved_registers attribue. */ + gcc_assert (TARGET_64BIT + || (cfun->machine->call_saved_registers + == TYPE_NO_CALLER_SAVED_REGISTERS) + || !frame->nsseregs); if (TARGET_64BIT && m->call_ms2sysv) { gcc_assert (stack_alignment_needed >= 16); - gcc_assert (!frame->nsseregs); + gcc_assert ((cfun->machine->call_saved_registers + == TYPE_NO_CALLER_SAVED_REGISTERS) + || !frame->nsseregs); } /* For SEH we have to limit the amount of code movement into the prologue. @@ -7171,20 +7268,7 @@ ix86_compute_frame_layout (void) /* Size prologue needs to allocate. */ to_allocate = offset - frame->sse_reg_save_offset; - if ((!to_allocate && frame->nregs <= 1) - || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) - /* If static stack checking is enabled and done with probes, - the registers need to be saved before allocating the frame. */ - || flag_stack_check == STATIC_BUILTIN_STACK_CHECK - /* If stack clash probing needs a loop, then it needs a - scratch register. But the returned register is only guaranteed - to be safe to use after register saves are complete. So if - stack clash protections are enabled and the allocated frame is - larger than the probe interval, then use pushes to save - callee saved registers. */ - || (flag_stack_clash_protection - && !ix86_target_stack_probe () - && to_allocate > get_probe_interval ())) + if (save_regs_using_push_pop (to_allocate)) frame->save_regs_using_mov = false; if (ix86_using_red_zone () @@ -7642,7 +7726,9 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) { - ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); + /* Skip registers, already processed by shrink wrap separate. */ + if (!cfun->machine->reg_is_wrapped_separately[regno]) + ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); cfa_offset -= UNITS_PER_WORD; } } @@ -7735,8 +7821,15 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, add_frame_related_expr = true; } - insn = emit_insn (gen_pro_epilogue_adjust_stack_add - (Pmode, dest, src, addend)); + /* Shrink wrap separate may insert prologue between TEST and JMP. In order + not to affect EFlags, emit add without reg clobbering. */ + if (crtl->shrink_wrapped_separate) + insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc + (Pmode, dest, src, addend)); + else + insn = emit_insn (gen_pro_epilogue_adjust_stack_add + (Pmode, dest, src, addend)); + if (style >= 0) ix86_add_queued_cfa_restore_notes (insn); @@ -7920,6 +8013,15 @@ ix86_update_stack_boundary (void) if (ix86_tls_descriptor_calls_expanded_in_cfun && crtl->preferred_stack_boundary < 128) crtl->preferred_stack_boundary = 128; + + /* For 32-bit MS ABI, both the incoming and preferred stack boundaries + are 32 bits, but if force_align_arg_pointer is specified, it should + prefer 128 bits for a backward-compatibility reason, which is also + what the doc suggests. */ + if (lookup_attribute ("force_align_arg_pointer", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) + && crtl->preferred_stack_boundary < 128) + crtl->preferred_stack_boundary = 128; } /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is @@ -7950,8 +8052,7 @@ ix86_get_drap_rtx (void) start_sequence (); drap_vreg = copy_to_reg (arg_ptr); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); if (!optimize) @@ -8472,6 +8573,128 @@ output_probe_stack_range (rtx reg, rtx end) return ""; } +/* Data passed to ix86_update_stack_alignment. */ +struct stack_access_data +{ + /* The stack access register. */ + const_rtx reg; + /* Pointer to stack alignment. */ + unsigned int *stack_alignment; +}; + +/* Update the maximum stack slot alignment from memory alignment in PAT. */ + +static void +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) +{ + /* This insn may reference stack slot. Update the maximum stack slot + alignment if the memory is referenced by the stack access register. */ + stack_access_data *p = (stack_access_data *) data; + + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, pat, ALL) + { + auto op = *iter; + if (MEM_P (op)) + { + if (reg_mentioned_p (p->reg, XEXP (op, 0))) + { + unsigned int alignment = MEM_ALIGN (op); + + if (alignment > *p->stack_alignment) + *p->stack_alignment = alignment; + break; + } + else + iter.skip_subrtxes (); + } + } +} + +/* Helper function for ix86_find_all_reg_uses. */ + +static void +ix86_find_all_reg_uses_1 (HARD_REG_SET ®set, + rtx set, unsigned int regno, + auto_bitmap &worklist) +{ + rtx dest = SET_DEST (set); + + if (!REG_P (dest)) + return; + + /* Reject non-Pmode modes. */ + if (GET_MODE (dest) != Pmode) + return; + + unsigned int dst_regno = REGNO (dest); + + if (TEST_HARD_REG_BIT (regset, dst_regno)) + return; + + const_rtx src = SET_SRC (set); + + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, src, ALL) + { + auto op = *iter; + + if (MEM_P (op)) + iter.skip_subrtxes (); + + if (REG_P (op) && REGNO (op) == regno) + { + /* Add this register to register set. */ + add_to_hard_reg_set (®set, Pmode, dst_regno); + bitmap_set_bit (worklist, dst_regno); + break; + } + } +} + +/* Find all registers defined with register REGNO. */ + +static void +ix86_find_all_reg_uses (HARD_REG_SET ®set, + unsigned int regno, auto_bitmap &worklist) +{ + for (df_ref ref = DF_REG_USE_CHAIN (regno); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + + if (!NONJUMP_INSN_P (insn)) + continue; + + unsigned int ref_regno = DF_REF_REGNO (ref); + + rtx set = single_set (insn); + if (set) + { + ix86_find_all_reg_uses_1 (regset, set, + ref_regno, worklist); + continue; + } + + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != PARALLEL) + continue; + + for (int i = 0; i < XVECLEN (pat, 0); i++) + { + rtx exp = XVECEXP (pat, 0, i); + + if (GET_CODE (exp) == SET) + ix86_find_all_reg_uses_1 (regset, exp, + ref_regno, worklist); + } + } +} + /* Set stack_frame_required to false if stack frame isn't required. Update STACK_ALIGNMENT to the largest alignment, in bits, of stack slot used if stack frame is required and CHECK_STACK_SLOT is true. */ @@ -8490,10 +8713,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, add_to_hard_reg_set (&set_up_by_prologue, Pmode, HARD_FRAME_POINTER_REGNUM); - /* The preferred stack alignment is the minimum stack alignment. */ - if (stack_alignment > crtl->preferred_stack_boundary) - stack_alignment = crtl->preferred_stack_boundary; - bool require_stack_frame = false; FOR_EACH_BB_FN (bb, cfun) @@ -8505,27 +8724,67 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, set_up_by_prologue)) { require_stack_frame = true; - - if (check_stack_slot) - { - /* Find the maximum stack alignment. */ - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) - if (MEM_P (*iter) - && (reg_mentioned_p (stack_pointer_rtx, - *iter) - || reg_mentioned_p (frame_pointer_rtx, - *iter))) - { - unsigned int alignment = MEM_ALIGN (*iter); - if (alignment > stack_alignment) - stack_alignment = alignment; - } - } + break; } } cfun->machine->stack_frame_required = require_stack_frame; + + /* Stop if we don't need to check stack slot. */ + if (!check_stack_slot) + return; + + /* The preferred stack alignment is the minimum stack alignment. */ + if (stack_alignment > crtl->preferred_stack_boundary) + stack_alignment = crtl->preferred_stack_boundary; + + HARD_REG_SET stack_slot_access; + CLEAR_HARD_REG_SET (stack_slot_access); + + /* Stack slot can be accessed by stack pointer, frame pointer or + registers defined by stack pointer or frame pointer. */ + auto_bitmap worklist; + + add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM); + bitmap_set_bit (worklist, STACK_POINTER_REGNUM); + + if (frame_pointer_needed) + { + add_to_hard_reg_set (&stack_slot_access, Pmode, + HARD_FRAME_POINTER_REGNUM); + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM); + } + + unsigned int regno; + + do + { + regno = bitmap_clear_first_set_bit (worklist); + ix86_find_all_reg_uses (stack_slot_access, regno, worklist); + } + while (!bitmap_empty_p (worklist)); + + hard_reg_set_iterator hrsi; + stack_access_data data; + + data.stack_alignment = &stack_alignment; + + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi) + for (df_ref ref = DF_REG_USE_CHAIN (regno); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + + if (!NONJUMP_INSN_P (insn)) + continue; + + data.reg = DF_REF_REG (ref); + note_stores (insn, ix86_update_stack_alignment, &data); + } } /* Finalize stack_realign_needed and frame_pointer_needed flags, which @@ -9035,11 +9294,22 @@ ix86_expand_prologue (void) doing this if we have to probe the stack; at least on x86_64 the stack probe can turn into a call that clobbers a red zone location. */ else if (ix86_using_red_zone () - && (! TARGET_STACK_PROBE - || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) + && (! TARGET_STACK_PROBE + || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) { + HOST_WIDE_INT allocate_offset; + if (crtl->shrink_wrapped_separate) + { + allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset; + + /* Adjust the total offset at the beginning of the function. */ + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (allocate_offset), -1, + m->fs.cfa_reg == stack_pointer_rtx); + m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset; + } + ix86_emit_save_regs_using_mov (frame.reg_save_offset); - cfun->machine->red_zone_used = true; int_registers_saved = true; } } @@ -9617,30 +9887,35 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) { - rtx reg = gen_rtx_REG (word_mode, regno); - rtx mem; - rtx_insn *insn; - - mem = choose_baseaddr (cfa_offset, NULL); - mem = gen_frame_mem (word_mode, mem); - insn = emit_move_insn (reg, mem); - if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) + /* Skip registers, already processed by shrink wrap separate. */ + if (!cfun->machine->reg_is_wrapped_separately[regno]) { - /* Previously we'd represented the CFA as an expression - like *(%ebp - 8). We've just popped that value from - the stack, which means we need to reset the CFA to - the drap register. This will remain until we restore - the stack pointer. */ - add_reg_note (insn, REG_CFA_DEF_CFA, reg); - RTX_FRAME_RELATED_P (insn) = 1; + rtx reg = gen_rtx_REG (word_mode, regno); + rtx mem; + rtx_insn *insn; - /* This means that the DRAP register is valid for addressing. */ - m->fs.drap_valid = true; - } - else - ix86_add_cfa_restore_note (NULL, reg, cfa_offset); + mem = choose_baseaddr (cfa_offset, NULL); + mem = gen_frame_mem (word_mode, mem); + insn = emit_move_insn (reg, mem); + if (m->fs.cfa_reg == crtl->drap_reg + && regno == REGNO (crtl->drap_reg)) + { + /* Previously we'd represented the CFA as an expression + like *(%ebp - 8). We've just popped that value from + the stack, which means we need to reset the CFA to + the drap register. This will remain until we restore + the stack pointer. */ + add_reg_note (insn, REG_CFA_DEF_CFA, reg); + RTX_FRAME_RELATED_P (insn) = 1; + + /* DRAP register is valid for addressing. */ + m->fs.drap_valid = true; + } + else + ix86_add_cfa_restore_note (NULL, reg, cfa_offset); + } cfa_offset -= UNITS_PER_WORD; } } @@ -9919,10 +10194,11 @@ ix86_expand_epilogue (int style) less work than reloading sp and popping the register. */ else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1) restore_regs_via_mov = true; - else if (TARGET_EPILOGUE_USING_MOVE - && cfun->machine->use_fast_prologue_epilogue - && (frame.nregs > 1 - || m->fs.sp_offset != reg_save_offset)) + else if (crtl->shrink_wrapped_separate + || (TARGET_EPILOGUE_USING_MOVE + && cfun->machine->use_fast_prologue_epilogue + && (frame.nregs > 1 + || m->fs.sp_offset != reg_save_offset))) restore_regs_via_mov = true; else if (frame_pointer_needed && !frame.nregs @@ -9936,6 +10212,9 @@ ix86_expand_epilogue (int style) else restore_regs_via_mov = false; + if (crtl->shrink_wrapped_separate) + gcc_assert (restore_regs_via_mov); + if (restore_regs_via_mov || frame.nsseregs) { /* Ensure that the entire register save area is addressable via @@ -9988,6 +10267,7 @@ ix86_expand_epilogue (int style) gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); gcc_assert (!crtl->drap_reg); gcc_assert (!frame.nregs); + gcc_assert (!crtl->shrink_wrapped_separate); } else if (restore_regs_via_mov) { @@ -10002,6 +10282,8 @@ ix86_expand_epilogue (int style) rtx sa = EH_RETURN_STACKADJ_RTX; rtx_insn *insn; + gcc_assert (!crtl->shrink_wrapped_separate); + /* Stack realignment doesn't work with eh_return. */ if (crtl->stack_realign_needed) sorry ("Stack realignment not supported with " @@ -11183,6 +11465,9 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) x = XVECEXP (x, 0, 0); return (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); + case UNSPEC_SECREL32: + x = XVECEXP (x, 0, 0); + return GET_CODE (x) == SYMBOL_REF; default: return false; } @@ -11230,7 +11515,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) case E_OImode: case E_XImode: if (!standard_sse_constant_p (x, mode) - && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512 + && GET_MODE_SIZE (TARGET_AVX512F ? XImode : (TARGET_AVX ? OImode @@ -11319,6 +11604,9 @@ legitimate_pic_operand_p (rtx x) x = XVECEXP (inner, 0, 0); return (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); + case UNSPEC_SECREL32: + x = XVECEXP (inner, 0, 0); + return GET_CODE (x) == SYMBOL_REF; case UNSPEC_MACHOPIC_OFFSET: return legitimate_pic_address_disp_p (x); default: @@ -11499,6 +11787,9 @@ legitimate_pic_address_disp_p (rtx disp) disp = XVECEXP (disp, 0, 0); return (GET_CODE (disp) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); + case UNSPEC_SECREL32: + disp = XVECEXP (disp, 0, 0); + return GET_CODE (disp) == SYMBOL_REF; } return false; @@ -11776,6 +12067,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, case UNSPEC_INDNTPOFF: case UNSPEC_NTPOFF: case UNSPEC_DTPOFF: + case UNSPEC_SECREL32: break; default: @@ -11801,7 +12093,8 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF - && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32)) /* Non-constant pic memory reference. */ return false; } @@ -12125,6 +12418,24 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg) return tp; } +/* Construct the SYMBOL_REF for the _tls_index symbol. */ + +static GTY(()) rtx ix86_tls_index_symbol; + +#if TARGET_WIN32_TLS +static rtx +ix86_tls_index (void) +{ + if (!ix86_tls_index_symbol) + ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index"); + + if (flag_pic) + return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL)); + else + return ix86_tls_index_symbol; +} +#endif + /* Construct the SYMBOL_REF for the tls_get_addr function. */ static GTY(()) rtx ix86_tls_symbol; @@ -12183,6 +12494,26 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) machine_mode tp_mode = Pmode; int type; +#if TARGET_WIN32_TLS + off = gen_const_mem (SImode, ix86_tls_index ()); + set_mem_alias_set (off, GOT_ALIAS_SET); + + tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44)); + set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG); + + if (TARGET_64BIT) + off = convert_to_mode (Pmode, off, 1); + + base = force_reg (Pmode, off); + tp = copy_to_mode_reg (Pmode, tp); + + tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD)))); + set_mem_alias_set (tp, GOT_ALIAS_SET); + + base = force_reg (Pmode, tp); + + return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32))); +#else /* Fall back to global dynamic model if tool chain cannot support local dynamic. */ if (TARGET_SUN_TLS && !TARGET_64BIT @@ -12231,13 +12562,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT) { rtx rax = gen_rtx_REG (Pmode, AX_REG); + rtx rdi = gen_rtx_REG (Pmode, DI_REG); rtx_insn *insns; start_sequence (); emit_call_insn - (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr)); - insns = get_insns (); - end_sequence (); + (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi)); + insns = end_sequence (); if (GET_MODE (x) != Pmode) x = gen_rtx_ZERO_EXTEND (Pmode, x); @@ -12285,14 +12616,14 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT) { rtx rax = gen_rtx_REG (Pmode, AX_REG); + rtx rdi = gen_rtx_REG (Pmode, DI_REG); rtx_insn *insns; rtx eqv; start_sequence (); emit_call_insn - (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr)); - insns = get_insns (); - end_sequence (); + (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi)); + insns = end_sequence (); /* Attach a unique REG_EQUAL, to allow the RTL optimizers to share the LD_BASE result with other LD model accesses. */ @@ -12405,6 +12736,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) } return dest; +#endif } /* Return true if the TLS address requires insn using integer registers. @@ -12874,6 +13206,9 @@ output_pic_addr_const (FILE *file, rtx x, int code) case UNSPEC_INDNTPOFF: fputs ("@indntpoff", file); break; + case UNSPEC_SECREL32: + fputs ("@secrel32", file); + break; #if TARGET_MACHO case UNSPEC_MACHOPIC_OFFSET: putc ('-', file); @@ -12899,7 +13234,11 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x) { fputs (ASM_LONG, file); output_addr_const (file, x); +#if TARGET_WIN32_TLS + fputs ("@secrel32", file); +#else fputs ("@dtpoff", file); +#endif switch (size) { case 4: @@ -13558,10 +13897,11 @@ print_reg (rtx x, int code, FILE *file) H -- print a memory address offset by 8; used for sse high-parts Y -- print condition for XOP pcom* instruction. V -- print naked full integer register name without %. + v -- print segment override prefix + -- print a branch hint as 'cs' or 'ds' prefix ; -- print a semicolon (after prefixes due to bug in older gas). ~ -- print "i" if TARGET_AVX2, "f" otherwise. - ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode + ^ -- print addr32 prefix if Pmode != word_mode M -- print addr32 prefix for TARGET_X32 with VSIB address. ! -- print NOTRACK prefix for jxx/call/ret instructions if required. N -- print maskz if it's constant 0 operand. @@ -14063,6 +14403,28 @@ ix86_print_operand (FILE *file, rtx x, int code) return; + case 'v': + if (MEM_P (x)) + { + switch (MEM_ADDR_SPACE (x)) + { + case ADDR_SPACE_GENERIC: + break; + case ADDR_SPACE_SEG_FS: + fputs ("fs ", file); + break; + case ADDR_SPACE_SEG_GS: + fputs ("gs ", file); + break; + default: + gcc_unreachable (); + } + } + else + output_operand_lossage ("operand is not a memory reference, " + "invalid operand code 'v'"); + return; + case '*': if (ASSEMBLER_DIALECT == ASM_ATT) putc ('*', file); @@ -14137,7 +14499,7 @@ ix86_print_operand (FILE *file, rtx x, int code) return; case '^': - if (TARGET_64BIT && Pmode != word_mode) + if (Pmode != word_mode) fputs ("addr32 ", file); return; @@ -14652,6 +15014,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x) output_addr_const (file, op); fputs ("@indntpoff", file); break; + case UNSPEC_SECREL32: + output_addr_const (file, op); + fputs ("@secrel32", file); + break; #if TARGET_MACHO case UNSPEC_MACHOPIC_OFFSET: output_addr_const (file, op); @@ -17904,9 +18270,14 @@ ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) if (cum->decl && !TREE_PUBLIC (cum->decl)) return; - const_tree ctx = get_ultimate_context (cum->decl); - if (ctx != NULL_TREE - && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) + tree decl = cum->decl; + if (!decl) + /* If we don't know the target, look at the current TU. */ + decl = current_function_decl; + + const_tree ctx = get_ultimate_context (decl); + if (ctx == NULL_TREE + || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) return; /* If the actual size of the type is zero, then there is no change @@ -20043,14 +20414,10 @@ ix86_vectorize_builtin_scatter (const_tree vectype, { bool si; enum ix86_builtins code; - const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype)); if (!TARGET_AVX512F) return NULL_TREE; - if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64) - return NULL_TREE; - if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u) ? !TARGET_USE_SCATTER_2PARTS : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) @@ -20793,7 +21160,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, return true; /* x87 registers can't do subreg at all, as all values are reformatted - to extended precision. */ + to extended precision. + + ??? middle-end queries mode changes for ALL_REGS and this makes + vec_series_lowpart_p to always return false. We probably should + restrict this to modes supported by i387 and check if it is enabled. */ if (MAYBE_FLOAT_CLASS_P (regclass)) return false; @@ -21168,7 +21539,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) - any of 512-bit wide vector mode - any scalar mode. */ if (TARGET_AVX512F - && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512) + && ((VALID_AVX512F_REG_OR_XI_MODE (mode)) || VALID_AVX512F_SCALAR_MODE (mode))) return true; @@ -21339,19 +21710,20 @@ ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) return mode1 == SFmode; /* If MODE2 is only appropriate for an SSE register, then tie with - any other mode acceptable to SSE registers. */ - if (GET_MODE_SIZE (mode2) == 64 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) - return (GET_MODE_SIZE (mode1) == 64 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); - if (GET_MODE_SIZE (mode2) == 32 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) - return (GET_MODE_SIZE (mode1) == 32 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); - if (GET_MODE_SIZE (mode2) == 16 + any vector modes or scalar floating point modes acceptable to SSE + registers, excluding scalar integer modes with SUBREG: + (subreg:QI (reg:TI 99) 0)) + (subreg:HI (reg:TI 99) 0)) + (subreg:SI (reg:TI 99) 0)) + (subreg:DI (reg:TI 99) 0)) + to avoid unnecessary move from SSE register to integer register. + */ + if (GET_MODE_SIZE (mode2) >= 16 + && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2) + || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1)) + && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2))) && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) - return (GET_MODE_SIZE (mode1) == 16 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); + return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); /* If MODE2 is appropriate for an MMX register, then tie with any other mode acceptable to MMX registers. */ @@ -21409,7 +21781,7 @@ ix86_set_reg_reg_cost (machine_mode mode) case MODE_VECTOR_INT: case MODE_VECTOR_FLOAT: - if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) + if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) @@ -21470,7 +21842,7 @@ ix86_widen_mult_cost (const struct processor_costs *cost, /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend, require extra 4 mul, 4 add, 4 cmp and 2 shift. */ if (!TARGET_SSE4_1 && !uns_p) - extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4 + extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4 + cost->sse_op * 2; /* Fallthru. */ case V4DImode: @@ -21520,11 +21892,11 @@ ix86_multiplication_cost (const struct processor_costs *cost, else if (TARGET_AVX2) nops += 2; else if (TARGET_XOP) - extra += cost->sse_load[2]; + extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; else { nops += 1; - extra += cost->sse_load[2]; + extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; } goto do_qimode; @@ -21543,13 +21915,13 @@ ix86_multiplication_cost (const struct processor_costs *cost, { nmults += 1; nops += 2; - extra += cost->sse_load[2]; + extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; } else { nmults += 1; nops += 4; - extra += cost->sse_load[2]; + extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; } goto do_qimode; @@ -21562,14 +21934,16 @@ ix86_multiplication_cost (const struct processor_costs *cost, { nmults += 1; nops += 4; - extra += cost->sse_load[3] * 2; + /* 2 loads, so no division by 2. */ + extra += COSTS_N_INSNS (cost->sse_load[3]); } goto do_qimode; case V64QImode: nmults = 2; nops = 9; - extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2; + /* 2 loads of each size, so no division by 2. */ + extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]); do_qimode: return ix86_vec_cost (mode, cost->mulss * nmults @@ -21662,7 +22036,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, /* Use vpbroadcast. */ extra = cost->sse_op; else - extra = cost->sse_load[2]; + extra = COSTS_N_INSNS (cost->sse_load[2]) / 2; if (constant_op1) { @@ -21693,7 +22067,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, shift with one insn set the cost to prefer paddb. */ if (constant_op1) { - extra = cost->sse_load[2]; + extra = COSTS_N_INSNS (cost->sse_load[2]) / 2; return ix86_vec_cost (mode, cost->sse_op) + extra; } else @@ -21708,7 +22082,9 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, /* Use vpbroadcast. */ extra = cost->sse_op; else - extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3]; + extra = COSTS_N_INSNS (mode == V16QImode + ? cost->sse_load[2] + : cost->sse_load[3]) / 2; if (constant_op1) { @@ -21816,6 +22192,34 @@ ix86_insn_cost (rtx_insn *insn, bool speed) return insn_cost + pattern_cost (PATTERN (insn), speed); } +/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */ + +static int +vec_fp_conversion_cost (const struct processor_costs *cost, int size) +{ + if (size < 128) + return cost->cvtss2sd; + else if (size < 256) + { + if (TARGET_SSE_SPLIT_REGS) + return cost->cvtss2sd * size / 64; + return cost->cvtss2sd; + } + if (size < 512) + return cost->vcvtps2pd256; + else + return cost->vcvtps2pd512; +} + +/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */ + +static bool +unspec_pcmp_p (rtx x) +{ + return GET_CODE (x) == UNSPEC + && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP); +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -21833,9 +22237,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* Handling different vternlog variants. */ if ((GET_MODE_SIZE (mode) == 64 - ? (TARGET_AVX512F && TARGET_EVEX512) + ? TARGET_AVX512F : (TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256))) && GET_MODE_SIZE (mode) >= 16 && outer_code_i == SET && ternlog_operand (x, mode)) @@ -22184,8 +22588,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, { /* (ior (not ...) ...) can be a single insn in AVX512. */ if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F - && ((TARGET_EVEX512 - && GET_MODE_SIZE (mode) == 64) + && (GET_MODE_SIZE (mode) == 64 || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -22276,8 +22679,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* (and (not ...) (not ...)) can be a single insn in AVX512. */ if (GET_CODE (right) == NOT && TARGET_AVX512F - && ((TARGET_EVEX512 - && GET_MODE_SIZE (mode) == 64) + && (GET_MODE_SIZE (mode) == 64 || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -22347,8 +22749,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, { /* (not (xor ...)) can be a single insn in AVX512. */ if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F - && ((TARGET_EVEX512 - && GET_MODE_SIZE (mode) == 64) + && (GET_MODE_SIZE (mode) == 64 || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -22479,17 +22880,39 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case FLOAT_EXTEND: + /* x87 represents all values extended to 80bit. */ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = 0; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case FLOAT_TRUNCATE: if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = cost->fadd; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); + return false; + case FLOAT: + case UNSIGNED_FLOAT: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtpi2ps); + else + *total = cost->cvtsi2ss; + return false; + + case FIX: + case UNSIGNED_FIX: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtps2pi); + else + *total = cost->cvtss2si; return false; case ABS: @@ -22550,13 +22973,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } return false; - case VEC_SELECT: case VEC_CONCAT: /* ??? Assume all of these vector manipulation patterns are recognizable. In which case they all pretty much have the - same cost. */ + same cost. + ??? We should still recruse when computing cost. */ *total = cost->sse_op; return true; + + case VEC_SELECT: + /* Special case extracting lower part from the vector. + This by itself needs to code and most of SSE/AVX instructions have + packed and single forms where the single form may be represented + by such VEC_SELECT. + + Use cost 1 (despite the fact that functionally equivalent SUBREG has + cost 0). Making VEC_SELECT completely free, for example instructs CSE + to forward propagate VEC_SELECT into + + (set (reg eax) (reg src)) + + which then prevents fwprop and combining. See i.e. + gcc.target/i386/pr91103-1.c. + + ??? rtvec_series_p test should be, for valid patterns, equivalent to + vec_series_lowpart_p but is not, since the latter calls + can_cange_mode_class on ALL_REGS and this return false since x87 does + not support subregs at all. */ + if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0)) + *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), + outer_code, opno, speed) + 1; + else + /* ??? We should still recruse when computing cost. */ + *total = cost->sse_op; + return true; + case VEC_DUPLICATE: *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), @@ -22569,13 +23020,87 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case VEC_MERGE: mask = XEXP (x, 2); + /* Scalar versions of SSE instructions may be represented as: + + (vec_merge (vec_duplicate (operation ....)) + (register or memory) + (const_int 1)) + + In this case vec_merge and vec_duplicate is for free. + Just recurse into operation and second operand. */ + if (mask == const1_rtx + && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) + { + *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode, + outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } /* This is masked instruction, assume the same cost, as nonmasked variant. */ - if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) - *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); + else if (TARGET_AVX512F + && (register_operand (mask, GET_MODE (mask)) + /* Redunduant clean up of high bits for kmask with VL=2/4 + .i.e (vec_merge op0, op1, (and op3 15)). */ + || (GET_CODE (mask) == AND + && register_operand (XEXP (mask, 0), GET_MODE (mask)) + && CONST_INT_P (XEXP (mask, 1)) + && ((INTVAL (XEXP (mask, 1)) == 3 + && GET_MODE_NUNITS (mode) == 2) + || (INTVAL (XEXP (mask, 1)) == 15 + && GET_MODE_NUNITS (mode) == 4))))) + { + *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } + /* Combination of the two above: + + (vec_merge (vec_merge (vec_duplicate (operation ...)) + (register or memory) + (reg:QI mask)) + (register or memory) + (const_int 1)) + + i.e. avx512fp16_vcvtss2sh_mask. */ + else if (TARGET_AVX512F + && mask == const1_rtx + && GET_CODE (XEXP (x, 0)) == VEC_MERGE + && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE + && register_operand (XEXP (XEXP (x, 0), 2), + GET_MODE (XEXP (XEXP (x, 0), 2)))) + { + *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), + mode, outer_code, opno, speed) + + rtx_cost (XEXP (XEXP (x, 0), 1), + mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } + /* vcmp. */ + else if (unspec_pcmp_p (mask) + || (GET_CODE (mask) == NOT + && unspec_pcmp_p (XEXP (mask, 0)))) + { + rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask; + rtx unsop0 = XVECEXP (uns, 0, 0); + /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0) + cost the same as register. + This is used by avx_cmp<mode>3_ltint_not. */ + if (GET_CODE (unsop0) == SUBREG) + unsop0 = XEXP (unsop0, 0); + if (GET_CODE (unsop0) == NOT) + unsop0 = XEXP (unsop0, 0); + *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) + + rtx_cost (unsop0, mode, UNSPEC, opno, speed) + + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed) + + cost->sse_op; + return true; + } else *total = cost->sse_op; - return true; + return false; case MEM: /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast. @@ -22592,7 +23117,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } /* An insn that accesses memory is slightly more expensive - than one that does not. */ + than one that does not. */ if (speed) { *total += 1; @@ -22833,7 +23358,9 @@ x86_this_parameter (tree function) { const int *parm_regs; - if (ix86_function_type_abi (type) == MS_ABI) + if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type))) + parm_regs = x86_64_preserve_none_int_parameter_registers; + else if (ix86_function_type_abi (type) == MS_ABI) parm_regs = x86_64_ms_abi_int_parameter_registers; else parm_regs = x86_64_int_parameter_registers; @@ -23159,13 +23686,21 @@ x86_field_alignment (tree type, int computed) /* Print call to TARGET to FILE. */ static void -x86_print_call_or_nop (FILE *file, const char *target) +x86_print_call_or_nop (FILE *file, const char *target, + const char *label) { if (flag_nop_mcount || !strcmp (target, "nop")) /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ - fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); + fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n", + label); + else if (!TARGET_PECOFF && flag_pic) + { + gcc_assert (flag_plt); + + fprintf (file, "%s\tcall\t%s@PLT\n", label, target); + } else - fprintf (file, "1:\tcall\t%s\n", target); + fprintf (file, "%s\tcall\t%s\n", label, target); } static bool @@ -23250,6 +23785,13 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) const char *mcount_name = MCOUNT_NAME; + bool fentry_section_p + = (flag_record_mcount + || lookup_attribute ("fentry_section", + DECL_ATTRIBUTES (current_function_decl))); + + const char *label = fentry_section_p ? "1:" : ""; + if (current_fentry_name (&mcount_name)) ; else if (fentry_name) @@ -23285,11 +23827,12 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) reg = legacy_reg; } if (ASSEMBLER_DIALECT == ASM_INTEL) - fprintf (file, "1:\tmovabs\t%s, OFFSET FLAT:%s\n" - "\tcall\t%s\n", reg, mcount_name, reg); + fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n" + "\tcall\t%s\n", label, reg, mcount_name, + reg); else - fprintf (file, "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n", - mcount_name, reg, reg); + fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n", + label, mcount_name, reg, reg); break; case CM_LARGE_PIC: #ifdef NO_PROFILE_COUNTERS @@ -23327,24 +23870,24 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) break; case CM_SMALL_PIC: case CM_MEDIUM_PIC: - if (!ix86_direct_extern_access) + if (!flag_plt) { if (ASSEMBLER_DIALECT == ASM_INTEL) - fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n", - mcount_name); + fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n", + label, mcount_name); else - fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", - mcount_name); + fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n", + label, mcount_name); break; } /* fall through */ default: - x86_print_call_or_nop (file, mcount_name); + x86_print_call_or_nop (file, mcount_name, label); break; } } else - x86_print_call_or_nop (file, mcount_name); + x86_print_call_or_nop (file, mcount_name, label); } else if (flag_pic) { @@ -23358,10 +23901,14 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n", LPREFIX, labelno); #endif - if (ASSEMBLER_DIALECT == ASM_INTEL) - fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name); + if (flag_plt) + x86_print_call_or_nop (file, mcount_name, label); + else if (ASSEMBLER_DIALECT == ASM_INTEL) + fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n", + label, mcount_name); else - fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); + fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n", + label, mcount_name); } else { @@ -23374,12 +23921,10 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n", LPREFIX, labelno); #endif - x86_print_call_or_nop (file, mcount_name); + x86_print_call_or_nop (file, mcount_name, label); } - if (flag_record_mcount - || lookup_attribute ("fentry_section", - DECL_ATTRIBUTES (current_function_decl))) + if (fentry_section_p) { const char *sname = "__mcount_loc"; @@ -24138,7 +24683,7 @@ ix86_vector_mode_supported_p (machine_mode mode) return true; if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) return true; - if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) + if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) return true; if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode)) @@ -24386,8 +24931,7 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, } } - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); if (saw_asm_flag) return seq; @@ -24675,7 +25219,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, switch (type_of_cost) { case scalar_stmt: - return fp ? ix86_cost->addss : COSTS_N_INSNS (1); + return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: /* load/store costs are relative to register move which is 2. Recompute @@ -24746,7 +25290,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return ix86_cost->cond_not_taken_branch_cost; case vec_perm: + return ix86_vec_cost (mode, ix86_cost->sse_op); + case vec_promote_demote: + if (fp) + return vec_fp_conversion_cost (ix86_tune_cost, mode); return ix86_vec_cost (mode, ix86_cost->sse_op); case vec_construct: @@ -24759,12 +25307,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, /* One vinserti128 for combining two SSE vectors for AVX256. */ else if (GET_MODE_BITSIZE (mode) == 256) return ((n - 2) * ix86_cost->sse_op - + ix86_vec_cost (mode, ix86_cost->addss)); + + ix86_vec_cost (mode, ix86_cost->sse_op)); /* One vinserti64x4 and two vinserti128 for combining SSE and AVX256 vectors to AVX512. */ else if (GET_MODE_BITSIZE (mode) == 512) - return ((n - 4) * ix86_cost->sse_op - + 3 * ix86_vec_cost (mode, ix86_cost->addss)); + { + machine_mode half_mode + = mode_for_vector (GET_MODE_INNER (mode), + GET_MODE_NUNITS (mode) / 2).require (); + return ((n - 4) * ix86_cost->sse_op + + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op) + + ix86_vec_cost (mode, ix86_cost->sse_op)); + } gcc_unreachable (); } @@ -24932,7 +25486,7 @@ ix86_preferred_simd_mode (scalar_mode mode) switch (mode) { case E_QImode: - if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) return V64QImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V32QImode; @@ -24940,7 +25494,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V16QImode; case E_HImode: - if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) return V32HImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V16HImode; @@ -24948,7 +25502,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V8HImode; case E_SImode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V16SImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V8SImode; @@ -24956,7 +25510,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V4SImode; case E_DImode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V8DImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V4DImode; @@ -24970,16 +25524,15 @@ ix86_preferred_simd_mode (scalar_mode mode) { if (TARGET_PREFER_AVX128) return V8HFmode; - else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512) + else if (TARGET_PREFER_AVX256) return V16HFmode; } - if (TARGET_EVEX512) - return V32HFmode; + return V32HFmode; } return word_mode; case E_BFmode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V32BFmode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V16BFmode; @@ -24987,7 +25540,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V8BFmode; case E_SFmode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V16SFmode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V8SFmode; @@ -24995,7 +25548,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V4SFmode; case E_DFmode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V8DFmode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V4DFmode; @@ -25015,13 +25568,13 @@ ix86_preferred_simd_mode (scalar_mode mode) static unsigned int ix86_autovectorize_vector_modes (vector_modes *modes, bool all) { - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) { modes->safe_push (V64QImode); modes->safe_push (V32QImode); modes->safe_push (V16QImode); } - else if (TARGET_AVX512F && TARGET_EVEX512 && all) + else if (TARGET_AVX512F && all) { modes->safe_push (V32QImode); modes->safe_push (V16QImode); @@ -25059,7 +25612,7 @@ ix86_get_mask_mode (machine_mode data_mode) unsigned elem_size = vector_size / nunits; /* Scalar mask case. */ - if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64) + if ((TARGET_AVX512F && vector_size == 64) || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)) /* AVX512FP16 only supports vector comparison to kmask for _Float16. */ @@ -25267,7 +25820,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, else if (X87_FLOAT_MODE_P (mode)) stmt_cost = ix86_cost->fadd; else - stmt_cost = ix86_cost->add; + stmt_cost = ix86_cost->add; } else stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss @@ -25322,7 +25875,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (subcode == RSHIFT_EXPR && !TYPE_UNSIGNED (TREE_TYPE (op1))) ? ASHIFTRT : LSHIFTRT, mode, - TREE_CODE (op2) == INTEGER_CST, + TREE_CODE (op2) == INTEGER_CST, cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1, false, false, NULL, NULL); @@ -25331,27 +25884,174 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case NOP_EXPR: /* Only sign-conversions are free. */ if (tree_nop_conversion_p - (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) stmt_cost = 0; + else if (fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + break; + + case FLOAT_EXPR: + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ix86_cost->cvtsi2ss; + else if (X87_FLOAT_MODE_P (mode)) + /* TODO: We do not have cost tables for x87. */ + stmt_cost = ix86_cost->fadd; + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps); + break; + + case FIX_TRUNC_EXPR: + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ix86_cost->cvtss2si; + else if (X87_FLOAT_MODE_P (mode)) + /* TODO: We do not have cost tables for x87. */ + stmt_cost = ix86_cost->fadd; + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi); + break; + + case COND_EXPR: + { + /* SSE2 conditinal move sequence is: + pcmpgtd %xmm5, %xmm0 (accounted separately) + pand %xmm0, %xmm2 + pandn %xmm1, %xmm0 + por %xmm2, %xmm0 + while SSE4 uses cmp + blend + and AVX512 masked moves. + + The condition is accounted separately since we usually have + p = a < b + c = p ? x : y + and we will account first statement as setcc. Exception is when + p is loaded from memory as bool and then we will not acocunt + the compare, but there is no way to check for this. */ + + int ninsns = TARGET_SSE4_1 ? 1 : 3; + + /* If one of parameters is 0 or -1 the sequence will be simplified: + (if_true & mask) | (if_false & ~mask) -> if_true & mask */ + if (ninsns > 1 + && (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || zerop (gimple_assign_rhs3 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs3 (stmt_info->stmt)))) + ninsns = 1; + + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ninsns * ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op); + else + /* compare (accounted separately) + cmov. */ + stmt_cost = ix86_cost->add; + } break; - case BIT_IOR_EXPR: - case ABS_EXPR: - case ABSU_EXPR: case MIN_EXPR: case MAX_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode) + && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else + /* minss */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vpmin was introduced in SSE3. + SSE2 needs pcmpgtd + pand + pandn + pxor. + If one of parameters is 0 or -1 the sequence is simplified + to pcmpgtd + pand. */ + if (!TARGET_SSSE3) + { + if (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt))) + stmt_cost *= 2; + else + stmt_cost *= 4; + } + } + else + /* cmp + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case ABS_EXPR: + case ABSU_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode) + && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* fabs. */ + stmt_cost = ix86_cost->fabs; + else + /* andss of sign bit. */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vabs was introduced in SSE3. + SSE3 uses psrat + pxor + psub. */ + if (!TARGET_SSSE3) + stmt_cost *= 3; + } + else + /* neg + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case BIT_IOR_EXPR: case BIT_XOR_EXPR: case BIT_AND_EXPR: case BIT_NOT_EXPR: - if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) - stmt_cost = ix86_cost->sse_op; - else if (VECTOR_MODE_P (mode)) + gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode) + && !X87_FLOAT_MODE_P (mode)); + if (VECTOR_MODE_P (mode)) stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); else stmt_cost = ix86_cost->add; break; + default: + if (truth_value_p (subcode)) + { + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* CMPccS? insructions are cheap, so use sse_op. While they + produce a mask which may need to be turned to 0/1 by and, + expect that this will be optimized away in a common case. */ + stmt_cost = ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* fcmp + setcc. */ + stmt_cost = ix86_cost->fadd + ix86_cost->add; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + else + /* setcc. */ + stmt_cost = ix86_cost->add; + break; + } break; } } @@ -25375,6 +26075,37 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } + if (kind == vec_promote_demote) + { + int outer_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)))); + int inner_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); + bool inner_fp = FLOAT_TYPE_P + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))); + + if (fp && inner_fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + else if (fp && !inner_fp) + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps); + else if (!fp && inner_fp) + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi); + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is + greater than inner size we will end up doing two conversions and + packing them. We always pack pairs; if the size difference is greater + it is split into multiple demote operations. */ + if (inner_size > outer_size) + stmt_cost = stmt_cost * 2 + + ix86_vec_cost (mode, ix86_cost->sse_op); + } + /* If we do elementwise loads into a vector then we are bound by latency and execution resources for the many scalar loads (AGU and load ports). Try to account for this by scaling the @@ -25445,7 +26176,22 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, else { m_num_gpr_needed[where]++; - stmt_cost += ix86_cost->sse_to_integer; + + int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; + + /* For integer construction, the number of actual GPR -> XMM + moves will be somewhere between 0 and n. + We do not have very good idea about actual number, since + the source may be a constant, memory or a chain of + instructions that will be later converted by + scalar-to-vector pass. */ + if (kind == vec_construct + && GET_MODE_BITSIZE (mode) == 256) + cost *= 2; + else if (kind == vec_construct + && GET_MODE_BITSIZE (mode) == 512) + cost *= 3; + stmt_cost += cost; } } } @@ -25537,14 +26283,10 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both a AVX2 and a SSE epilogue for AVX512 vectorized loops. */ if (loop_vinfo + && LOOP_VINFO_EPILOGUE_P (loop_vinfo) + && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32 && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES]) - { - if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64) - m_suggested_epilogue_mode = V32QImode; - else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo) - && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32) - m_suggested_epilogue_mode = V16QImode; - } + m_suggested_epilogue_mode = V16QImode; /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger enable a 64bit SSE epilogue. */ if (loop_vinfo @@ -25672,7 +26414,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, { /* If the function isn't exported, we can pick up just one ISA for the clones. */ - if (TARGET_AVX512F && TARGET_EVEX512) + if (TARGET_AVX512F) clonei->vecsize_mangle = 'e'; else if (TARGET_AVX2) clonei->vecsize_mangle = 'd'; @@ -25764,17 +26506,17 @@ ix86_simd_clone_usable (struct cgraph_node *node, machine_mode) return -1; if (!TARGET_AVX) return 0; - return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1; + return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1; case 'c': if (!TARGET_AVX) return -1; - return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0; + return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0; case 'd': if (!TARGET_AVX2) return -1; - return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0; + return TARGET_AVX512F ? 1 : 0; case 'e': - if (!TARGET_AVX512F || !TARGET_EVEX512) + if (!TARGET_AVX512F) return -1; return 0; default: @@ -27446,6 +28188,195 @@ ix86_cannot_copy_insn_p (rtx_insn *insn) #undef TARGET_DOCUMENTATION_NAME #define TARGET_DOCUMENTATION_NAME "x86" +/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ +sbitmap +ix86_get_separate_components (void) +{ + HOST_WIDE_INT offset, to_allocate; + sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); + bitmap_clear (components); + struct machine_function *m = cfun->machine; + + offset = m->frame.stack_pointer_offset; + to_allocate = offset - m->frame.sse_reg_save_offset; + + /* Shrink wrap separate uses MOV, which means APX PPX cannot be used. + Experiments show that APX PPX can speed up the prologue. If the function + does not exit early during actual execution, then using APX PPX is faster. + If the function always exits early during actual execution, then shrink + wrap separate reduces the number of MOV (PUSH/POP) instructions actually + executed, thus speeding up execution. + foo: + movl $1, %eax + testq %rdi, %rdi + jne.L60 + ret ---> early return. + .L60: + subq $88, %rsp ---> belong to prologue. + xorl %eax, %eax + movq %rbx, 40 (%rsp) ---> belong to prologue. + movq 8 (%rdi), %rbx + movq %rbp, 48 (%rsp) ---> belong to prologue. + movq %rdi, %rbp + testq %rbx, %rbx + jne.L61 + movq 40 (%rsp), %rbx + movq 48 (%rsp), %rbp + addq $88, %rsp + ret + .L61: + movq %r12, 56 (%rsp) ---> belong to prologue. + movq %r13, 64 (%rsp) ---> belong to prologue. + movq %r14, 72 (%rsp) ---> belong to prologue. + ... ... + + Disable shrink wrap separate when PPX is enabled. */ + if ((TARGET_APX_PPX && !crtl->calls_eh_return) + || cfun->machine->func_type != TYPE_NORMAL + || TARGET_SEH + || crtl->stack_realign_needed + || m->call_ms2sysv) + return components; + + /* Since shrink wrapping separate uses MOV instead of PUSH/POP. + Disable shrink wrap separate when MOV is prohibited. */ + if (save_regs_using_push_pop (to_allocate)) + return components; + + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + /* Skip registers with large offsets, where a pseudo may be needed. */ + if (IN_RANGE (offset, -0x8000, 0x7fff)) + bitmap_set_bit (components, regno); + offset += UNITS_PER_WORD; + } + + /* Don't mess with the following registers. */ + if (frame_pointer_needed) + bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); + + if (crtl->drap_reg) + bitmap_clear_bit (components, REGNO (crtl->drap_reg)); + + if (pic_offset_table_rtx) + bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM); + + return components; +} + +/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */ +sbitmap +ix86_components_for_bb (basic_block bb) +{ + bitmap in = DF_LIVE_IN (bb); + bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; + bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; + + sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); + bitmap_clear (components); + + function_abi_aggregator callee_abis; + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (CALL_P (insn)) + callee_abis.note_callee_abi (insn_callee_abi (insn)); + HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi); + + /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (!fixed_regs[regno] + && (TEST_HARD_REG_BIT (extra_caller_saves, regno) + || bitmap_bit_p (in, regno) + || bitmap_bit_p (gen, regno) + || bitmap_bit_p (kill, regno))) + bitmap_set_bit (components, regno); + + return components; +} + +/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */ +void +ix86_disqualify_components (sbitmap, edge, sbitmap, bool) +{ + /* Nothing to do for x86. */ +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */ +void +ix86_emit_prologue_components (sbitmap components) +{ + HOST_WIDE_INT cfa_offset; + struct machine_function *m = cfun->machine; + + cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset + - m->frame.stack_pointer_offset; + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + if (bitmap_bit_p (components, regno)) + ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); + cfa_offset -= UNITS_PER_WORD; + } +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ +void +ix86_emit_epilogue_components (sbitmap components) +{ + HOST_WIDE_INT cfa_offset; + struct machine_function *m = cfun->machine; + cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset + - m->frame.stack_pointer_offset; + + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + if (bitmap_bit_p (components, regno)) + { + rtx reg = gen_rtx_REG (word_mode, regno); + rtx mem; + rtx_insn *insn; + + mem = choose_baseaddr (cfa_offset, NULL); + mem = gen_frame_mem (word_mode, mem); + insn = emit_move_insn (reg, mem); + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, reg); + } + cfa_offset -= UNITS_PER_WORD; + } +} + +/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */ +void +ix86_set_handled_components (sbitmap components) +{ + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (bitmap_bit_p (components, regno)) + { + cfun->machine->reg_is_wrapped_separately[regno] = true; + cfun->machine->use_fast_prologue_epilogue = true; + cfun->machine->frame.save_regs_using_mov = true; + } +} + +#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS +#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components +#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB +#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb +#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS +#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components +#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \ + ix86_emit_prologue_components +#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \ + ix86_emit_epilogue_components +#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS +#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8507243..3f7ad68 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -179,6 +179,7 @@ struct processor_costs { const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */ zmm_move; const int sse_to_integer; /* cost of moving SSE register to integer. */ + const int integer_to_sse; /* cost of moving integer register to SSE. */ const int gather_static, gather_per_elt; /* Cost of gather load is computed as static + per_item * nelts. */ const int scatter_static, scatter_per_elt; /* Cost of gather store is @@ -207,6 +208,16 @@ struct processor_costs { const int divsd; /* cost of DIVSD instructions. */ const int sqrtss; /* cost of SQRTSS instructions. */ const int sqrtsd; /* cost of SQRTSD instructions. */ + const int cvtss2sd; /* cost SSE FP conversions, + such as CVTSS2SD. */ + const int vcvtps2pd256; /* cost 256bit packed FP conversions, + such as VCVTPD2PS with larger reg in ymm. */ + const int vcvtps2pd512; /* cost 512bit packed FP conversions, + such as VCVTPD2PS with larger reg in zmm. */ + const int cvtsi2ss; /* cost of CVTSI2SS instruction. */ + const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */ + const int cvtpi2ps; /* cost of CVTPI2PS instruction. */ + const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp @@ -479,7 +490,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_SSE_MOVCC_USE_BLENDV \ ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV] #define TARGET_ALIGN_TIGHT_LOOPS \ - ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS] + ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS] +#define TARGET_SSE_REDUCTION_PREFER_PSHUF \ + ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF] /* Feature tests against the various architecture variations. */ @@ -525,6 +538,7 @@ extern unsigned char ix86_prefetch_sse; #define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2) #define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS) #define TARGET_SUN_TLS 0 +#define TARGET_WIN32_TLS 0 #ifndef TARGET_64BIT_DEFAULT #define TARGET_64BIT_DEFAULT 0 @@ -804,7 +818,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); TARGET_ABSOLUTE_BIGGEST_ALIGNMENT. */ #define BIGGEST_ALIGNMENT \ - (TARGET_IAMCU ? 32 : ((TARGET_AVX512F && TARGET_EVEX512) \ + (TARGET_IAMCU ? 32 : (TARGET_AVX512F \ ? 512 : (TARGET_AVX ? 256 : 128))) /* Maximum stack alignment. */ @@ -1682,6 +1696,8 @@ typedef struct ix86_args { int stdarg; /* Set to 1 if function is stdarg. */ enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise MS_ABI for ms abi. */ + bool preserve_none_abi; /* Set to true if the preserve_none ABI is + used. */ tree decl; /* Callee decl. */ } CUMULATIVE_ARGS; @@ -1883,7 +1899,7 @@ typedef struct ix86_args { MOVE_MAX_PIECES defaults to MOVE_MAX. */ #define MOVE_MAX \ - ((TARGET_AVX512F && TARGET_EVEX512\ + ((TARGET_AVX512F \ && (ix86_move_max == PVW_AVX512 \ || ix86_store_max == PVW_AVX512)) \ ? 64 \ @@ -1902,7 +1918,7 @@ typedef struct ix86_args { store_by_pieces of 16/32/64 bytes. */ #define STORE_MAX_PIECES \ (TARGET_INTER_UNIT_MOVES_TO_VEC \ - ? ((TARGET_AVX512F && TARGET_EVEX512 && ix86_store_max == PVW_AVX512) \ + ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \ ? 64 \ : ((TARGET_AVX \ && ix86_store_max >= PVW_AVX256) \ @@ -2255,6 +2271,13 @@ extern unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER]; } while (0) #endif +/* In Intel syntax, we have to quote user-defined labels that would + match (unprefixed) registers or operators. */ + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ + ix86_asm_output_labelref ((STREAM), user_label_prefix, (NAME)) + /* Under some conditions we need jump tables in the text section, because the assembler cannot handle label differences between sections. */ @@ -2396,13 +2419,13 @@ constexpr wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SGX; constexpr wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU - | PTA_CLWB | PTA_EVEX512; + | PTA_CLWB; constexpr wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512 | PTA_AVX512VNNI; constexpr wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16; constexpr wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU - | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA | PTA_EVEX512; + | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA; constexpr wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG | PTA_RDPID | PTA_AVX512VPOPCNTDQ; @@ -2425,14 +2448,16 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID | PTA_SGX | PTA_PTWRITE; constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; -constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX +constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB + | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; -constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA - | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR; +constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE + | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD + | PTA_ENQCMD | PTA_UINTR; constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 - | PTA_PREFETCHI; + | PTA_PREFETCHI | PTA_AVX10_1; constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS | PTA_AMX_COMPLEX; constexpr wide_int_bitmask PTA_GRANDRIDGE = PTA_SIERRAFOREST; @@ -2444,16 +2469,11 @@ constexpr wide_int_bitmask PTA_CLEARWATERFOREST = PTA_SIERRAFOREST | PTA_AVXVNNIINT16 | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_USER_MSR | PTA_PREFETCHI; constexpr wide_int_bitmask PTA_PANTHERLAKE = PTA_ARROWLAKE_S | PTA_PREFETCHI; -constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA - | PTA_GFNI | PTA_VAES | PTA_VPCLMULQDQ | PTA_RDPID | PTA_PCONFIG - | PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD - | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK - | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI - | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256 - | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 - | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 - | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 - | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; +constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D + | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8 + | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2 + | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE + | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 @@ -2480,7 +2500,7 @@ constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ | PTA_AVX512IFMA | PTA_AVX512CD | PTA_AVX512BW | PTA_AVX512VL | PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI - | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ | PTA_EVEX512; + | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ; constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI; @@ -2782,11 +2802,13 @@ enum call_saved_registers_type or "no_caller_saved_registers" attribute. */ TYPE_NO_CALLER_SAVED_REGISTERS, /* The current function is a function specified with the - "no_callee_saved_registers" attribute. */ + "no_callee_saved_registers" attribute or a function specified with + the "noreturn" attribute when compiled with + "-mnoreturn-no-callee-saved-registers". */ TYPE_NO_CALLEE_SAVED_REGISTERS, - /* The current function is a function specified with the "noreturn" - attribute. */ - TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP, + /* The current function is a function specified with the + "preserve_none" attribute. */ + TYPE_PRESERVE_NONE, }; enum queued_insn_type @@ -2805,6 +2827,10 @@ struct GTY(()) machine_function { /* Cached initial frame layout for the current function. */ struct ix86_frame frame; + /* The components already handled by separate shrink-wrapping, which should + not be considered by the prologue and epilogue. */ + bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER]; + /* For -fsplit-stack support: A stack local which holds a pointer to the stack arguments for a function with a variable number of arguments. This is set at the start of the function and is used @@ -2859,7 +2885,7 @@ struct GTY(()) machine_function { ENUM_BITFIELD(indirect_branch) function_return_type : 3; /* Call saved registers type. */ - ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 2; + ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 3; /* If true, there is register available for argument passing. This is used only in ix86_function_ok_for_sibcall by 32-bit to determine @@ -2904,6 +2930,9 @@ struct GTY(()) machine_function { /* True if inline asm with redzone clobber has been seen. */ BOOL_BITFIELD asm_redzone_clobber_seen : 1; + /* True if this is a recursive function. */ + BOOL_BITFIELD recursive_function : 1; + /* The largest alignment, in bytes, of stack slot actually used. */ unsigned int max_used_stack_alignment; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d6b2f29..21b9f5c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -58,10 +58,11 @@ ;; H -- print a memory address offset by 8; used for sse high-parts ;; K -- print HLE lock prefix ;; Y -- print condition for XOP pcom* instruction. +;; v -- print segment override prefix ;; + -- print a branch hint as 'cs' or 'ds' prefix ;; ; -- print a semicolon (after prefixes due to bug in older gas). ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise. -;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode +;; ^ -- print addr32 prefix if Pmode != word_mode ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required. (define_c_enum "unspec" [ @@ -79,6 +80,7 @@ UNSPEC_MACHOPIC_OFFSET UNSPEC_PCREL UNSPEC_SIZEOF + UNSPEC_SECREL32 ;; Prologue support UNSPEC_STACK_ALLOC @@ -579,12 +581,11 @@ (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64, sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, - avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512, - noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq, - noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, - avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, - avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, - vaes_avx512vl,noapx_nf,avx10_2" + avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, + avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl, + avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma, + avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl, + avx_noavx512f,avx_noavx512vl,vaes_avx512vl,noapx_nf,avx10_2" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. @@ -954,12 +955,8 @@ (eq_attr "isa" "fma_or_avx512vl") (symbol_ref "TARGET_FMA || TARGET_AVX512VL") (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F") - (eq_attr "isa" "avx512f_512") - (symbol_ref "TARGET_AVX512F && TARGET_EVEX512") (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F") (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW") - (eq_attr "isa" "avx512bw_512") - (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512") (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW") (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ") (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") @@ -1495,7 +1492,7 @@ [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] - "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256" + "TARGET_AVX512F && !TARGET_PREFER_AVX256" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); @@ -1602,6 +1599,20 @@ [(set_attr "type" "icmp") (set_attr "mode" "<MODE>")]) +(define_insn "*cmp<mode>_plus_1" + [(set (reg FLAGS_REG) + (compare + (plus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m") + (match_operand:SWI 1 "x86_64_neg_const_int_operand" "n")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" +{ + operands[1] = gen_int_mode (-INTVAL (operands[1]), <MODE>mode); + return "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "icmp") + (set_attr "mode" "<MODE>")]) + (define_insn "*cmpqi_ext<mode>_1" [(set (reg FLAGS_REG) (compare @@ -2374,7 +2385,7 @@ (define_expand "movxi" [(set (match_operand:XI 0 "nonimmediate_operand") (match_operand:XI 1 "general_operand"))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "ix86_expand_vector_move (XImode, operands); DONE;") (define_expand "movoi" @@ -2427,22 +2438,32 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) -(define_insn "*mov<mode>_and" +;; Generate shorter "and $0,mem" for -Oz. Split it to "mov $0,mem" +;; otherwise. +(define_insn_and_split "*mov<mode>_and" [(set (match_operand:SWI248 0 "memory_operand" "=m") (match_operand:SWI248 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "and{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& !(optimize_insn_for_size_p () && optimize_size > 1)" + [(set (match_dup 0) (match_dup 1))] + "" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) -(define_insn "*mov<mode>_or" +;; Generate shorter "or $-1,mem" for -Oz. Split it to "mov $-1,mem" +;; otherwise. +(define_insn_and_split "*mov<mode>_or" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "or{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& !(optimize_insn_for_size_p () && optimize_size > 1)" + [(set (match_dup 0) (match_dup 1))] + "" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) @@ -2450,7 +2471,7 @@ (define_insn "*movxi_internal_avx512f" [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m") (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && (register_operand (operands[0], XImode) || register_operand (operands[1], XImode))" { @@ -2947,6 +2968,7 @@ (match_operand:SWI248 1 "const_int_operand"))] "optimize_insn_for_size_p () && optimize_size > 1 && operands[1] != const0_rtx + && operands[1] != constm1_rtx && IN_RANGE (INTVAL (operands[1]), -128, 127) && !ix86_red_zone_used && REGNO (operands[0]) != SP_REG" @@ -4414,7 +4436,7 @@ (eq_attr "alternative" "11") (const_string "DI") (eq_attr "alternative" "5") - (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512") + (cond [(and (match_test "TARGET_AVX512F") (not (match_test "TARGET_PREFER_AVX256"))) (const_string "V16SF") (match_test "TARGET_AVX") @@ -5482,7 +5504,7 @@ (set_attr "memory" "none") (set (attr "enabled") (if_then_else (eq_attr "alternative" "2") - (symbol_ref "TARGET_AVX512F && TARGET_EVEX512 + (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL && !TARGET_PREFER_AVX256") (const_string "*")))]) @@ -5704,7 +5726,7 @@ /* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly, and it always round to even. - flag_unsafte_math_optimization is needed for psrld. + flag_unsafe_math_optimization is needed for psrld. If we don't expect qNaNs nor sNaNs and can assume rounding to nearest, we can expand the conversion inline as (fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16. */ @@ -8708,6 +8730,34 @@ (set (match_dup 1) (minus:SWI (match_dup 1) (match_dup 0)))])]) +;; Under APX NDD, 'sub reg, mem, reg' is valid. +;; New format for +;; mov reg0, mem1 +;; sub reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; sub mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SWI 2 "memory_operand") + (match_dup 0))) + (set (match_dup 0) + (minus:SWI (match_dup 2) (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) (match_dup 0))) + (set (match_dup 2) + (minus:SWI (match_dup 2) (match_dup 0)))])]) + ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into ;; subl $1, %eax; jnc .Lxx; (define_peephole2 @@ -9155,6 +9205,118 @@ (match_dup 1)) (match_dup 0)))])]) +;; Under APX NDD, 'adc reg, mem, reg' is valid. +;; +;; New format for +;; mov reg0, mem1 +;; adc reg0, mem2, reg0 +;; mov mem1, reg0 +;; to +;; mov reg0, mem2 +;; adc mem1, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 2 "memory_operand")) + (match_dup 0))) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))]) + (set (match_dup 1) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (plus:<DWI> + (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 1) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0)))])]) + +;; New format for +;; mov reg0, mem1 +;; adc reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; adc mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 2 "memory_operand")) + (match_dup 0))) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0))) + (plus:<DWI> + (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 2) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))])]) + (define_peephole2 [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC @@ -9635,6 +9797,52 @@ [(match_dup 3) (const_int 0)])) (match_dup 0)))])]) +;; Under APX NDD, 'sbb reg, mem, reg' is valid. +;; +;; New format for +;; mov reg0, mem1 +;; sbb reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; sbb mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> (match_operand:SWI48 2 "memory_operand")) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (zero_extend:<DWI> + (match_dup 0))))) + (set (match_dup 0) + (minus:SWI48 + (minus:SWI48 + (match_dup 2) + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) + (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> (match_dup 2)) + (plus:<DWI> (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 2) + (minus:SWI48 (minus:SWI48 (match_dup 2) + (match_op_dup 5 + [(match_dup 3) (const_int 0)])) + (match_dup 0)))])]) + (define_peephole2 [(set (match_operand:SWI48 6 "general_reg_operand") (match_operand:SWI48 7 "memory_operand")) @@ -21315,11 +21523,12 @@ (set_attr "mode" "SI")]) ; As bsr is undefined behavior on zero and for other input -; values it is in range 0 to 63, we can optimize away sign-extends. -(define_insn_and_split "*bsr_rex64_2" +; values it is in range 0 to 63, we can optimize away sign-extends +; or zero-extends. +(define_insn_and_split "*bsr_rex64<u>_2" [(set (match_operand:DI 0 "register_operand") (xor:DI - (sign_extend:DI + (any_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) @@ -21341,9 +21550,9 @@ operands[3] = lowpart_subreg (SImode, operands[2], DImode); }) -(define_insn_and_split "*bsr_2" +(define_insn_and_split "*bsr<u>_2" [(set (match_operand:DI 0 "register_operand") - (sign_extend:DI + (any_extend:DI (xor:SI (minus:SI (const_int 31) @@ -21420,7 +21629,7 @@ (minus:DI (match_operand:DI 2 "const_int_operand") (xor:DI - (sign_extend:DI + (any_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) @@ -21450,7 +21659,7 @@ [(set (match_operand:DI 0 "register_operand") (minus:DI (match_operand:DI 2 "const_int_operand") - (sign_extend:DI + (any_extend:DI (xor:SI (minus:SI (const_int 31) (clz:SI (match_operand:SI 1 "nonimmediate_operand"))) @@ -22992,7 +23201,8 @@ (match_operand 3))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] - UNSPEC_TLS_GD)] + UNSPEC_TLS_GD) + (clobber (match_operand:P 4 "register_operand" "=D"))] "TARGET_64BIT" { if (!TARGET_X32) @@ -23009,7 +23219,7 @@ Use data16 prefix instead, which doesn't have this problem. */ fputs ("\tdata16", asm_out_file); output_asm_insn - ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands); if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) fputs (ASM_SHORT "0x6666\n", asm_out_file); else @@ -23033,14 +23243,15 @@ (match_operand 4))) (unspec:DI [(match_operand 1 "tls_symbolic_operand") (reg:DI SP_REG)] - UNSPEC_TLS_GD)] + UNSPEC_TLS_GD) + (clobber (match_operand:DI 5 "register_operand" "=D"))] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[3]) == CONST && GET_CODE (XEXP (operands[3], 0)) == UNSPEC && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF" { output_asm_insn - ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + ("lea{q}\t{%E1@tlsgd(%%rip), %5|%5, %E1@tlsgd[rip]}", operands); output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands); output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands); return "call\t{*%%rax|rax}"; @@ -23056,7 +23267,8 @@ (const_int 0))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] - UNSPEC_TLS_GD)])] + UNSPEC_TLS_GD) + (clobber (match_operand:P 3 "register_operand"))])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") @@ -23107,11 +23319,12 @@ (call:P (mem:QI (match_operand 1 "constant_call_address_operand" "Bz")) (match_operand 2))) - (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)] + (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:P 3 "register_operand" "=D"))] "TARGET_64BIT" { output_asm_insn - ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + ("lea{q}\t{%&@tlsld(%%rip), %q3|%q3, %&@tlsld[rip]}", operands); if (TARGET_SUN_TLS) return "call\t%p1@plt"; if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) @@ -23127,14 +23340,15 @@ (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") (match_operand:DI 2 "immediate_operand" "i"))) (match_operand 3))) - (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)] + (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:DI 4 "register_operand" "=D"))] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[2]) == CONST && GET_CODE (XEXP (operands[2], 0)) == UNSPEC && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF" { output_asm_insn - ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + ("lea{q}\t{%&@tlsld(%%rip), %4|%4, %&@tlsld[rip]}", operands); output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands); output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands); return "call\t{*%%rax|rax}"; @@ -23148,7 +23362,8 @@ (call:P (mem:QI (match_operand 1)) (const_int 0))) - (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])] + (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:P 2 "register_operand"))])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") @@ -25587,10 +25802,6 @@ (clobber (reg:CC FLAGS_REG))])] "" { - /* Can't use this for non-default address spaces. */ - if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))) - FAIL; - int piece_size = GET_MODE_SIZE (GET_MODE (operands[1])); /* If .md ever supports :P for Pmode, these can be directly @@ -25598,9 +25809,14 @@ operands[5] = plus_constant (Pmode, operands[0], piece_size); operands[6] = plus_constant (Pmode, operands[2], piece_size); - /* Can't use this if the user has appropriated esi or edi. */ + /* Can't use this if the user has appropriated esi or edi, + * or if we have the destination in the non-default address space, + * since string insns cannot override the destination segment. */ if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) - && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) + && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])) + && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])) + || Pmode == word_mode)) { emit_insn (gen_strmov_singleop (operands[0], operands[1], operands[2], operands[3], @@ -25635,8 +25851,15 @@ (const_int 8)))] "TARGET_64BIT && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsq" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsq"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "DI")]) @@ -25651,8 +25874,15 @@ (plus:P (match_dup 3) (const_int 4)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movs{l|d}" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movs{l|d}"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "SI")]) @@ -25667,8 +25897,15 @@ (plus:P (match_dup 3) (const_int 2)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsw" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsw"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "HI")]) @@ -25683,8 +25920,15 @@ (plus:P (match_dup 3) (const_int 1)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsb" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsb"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set (attr "prefix_rex") @@ -25723,8 +25967,15 @@ (use (match_dup 5))] "TARGET_64BIT && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movsq" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movsq"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25743,8 +25994,15 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movs{l|d}" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movs{l|d}"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25761,8 +26019,15 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movsb" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movsb"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25844,7 +26109,8 @@ (unspec [(const_int 0)] UNSPEC_STOS)] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosq" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25858,7 +26124,8 @@ (const_int 4))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25872,7 +26139,8 @@ (const_int 2))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosw" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25886,7 +26154,8 @@ (const_int 1))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosb" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25922,7 +26191,8 @@ (use (match_dup 4))] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -25940,7 +26210,8 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -25957,7 +26228,8 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -26224,8 +26496,8 @@ (define_expand "mov<mode>cc" [(set (match_operand:SWIM 0 "register_operand") (if_then_else:SWIM (match_operand 1 "comparison_operator") - (match_operand:SWIM 2 "<general_operand>") - (match_operand:SWIM 3 "<general_operand>")))] + (match_operand:SWIM 2 "general_operand") + (match_operand:SWIM 3 "general_operand")))] "" "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") @@ -26592,8 +26864,8 @@ [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF (match_operand 1 "comparison_operator") - (match_operand:X87MODEF 2 "register_operand") - (match_operand:X87MODEF 3 "register_operand")))] + (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand") + (match_operand:X87MODEF 3 "nonimm_or_0_operand")))] "(TARGET_80387 && TARGET_CMOVE) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") @@ -27183,7 +27455,7 @@ (cond [(and (eq_attr "alternative" "0") (not (match_test "TARGET_OPT_AGU"))) (const_string "alu") - (match_operand:<MODE> 2 "const0_operand") + (match_operand 2 "const0_operand") (const_string "imov") ] (const_string "lea"))) @@ -27197,6 +27469,46 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) +(define_insn "@pro_epilogue_adjust_stack_add_nocc<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (plus:P (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "<nonmemory_operand>" "l<i>"))) + (clobber (mem:BLK (scratch)))] + "" +{ + if (get_attr_type (insn) == TYPE_IMOV) + return "mov{<imodesuffix>}\t{%1, %0|%0, %1}"; + else + { + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}"; + } +} + [(set (attr "type") + (cond [(match_operand 2 "const0_operand") + (const_string "imov") + ] + (const_string "lea"))) + (set (attr "length_immediate") + (cond [(eq_attr "type" "imov") + (const_string "0") + ] + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_peephole2 + [(parallel + [(set (match_operand:P 0 "register_operand") + (plus:P (match_dup 0) + (match_operand:P 1 "<nonmemory_operand>"))) + (clobber (mem:BLK (scratch)))])] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel + [(set (match_dup 0) + (plus:P (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])]) + (define_insn "@pro_epilogue_adjust_stack_sub_<mode>" [(set (match_operand:P 0 "register_operand" "=r") (minus:P (match_operand:P 1 "register_operand" "0") @@ -28144,6 +28456,41 @@ const0_rtx); }) +;; For APX NDD PLUS/MINUS/LOGIC +;; Like cmpelim optimized pattern. +;; Reduce an extra mov instruction like +;; decl (%rdi), %eax +;; mov %eax, (%rdi) +;; to +;; decl (%rdi) +(define_peephole2 + [(parallel [(set (reg FLAGS_REG) + (compare (match_operator:SWI 2 "plusminuslogic_operator" + [(match_operand:SWI 0 "memory_operand") + (match_operand:SWI 1 "<nonmemory_operand>")]) + (const_int 0))) + (set (match_operand:SWI 3 "register_operand") (match_dup 2))]) + (set (match_dup 0) (match_dup 3))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (2, operands[3]) + && !reg_overlap_mentioned_p (operands[3], operands[0]) + && ix86_match_ccmode (peep2_next_insn (0), + (GET_CODE (operands[2]) == PLUS + || GET_CODE (operands[2]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 6)) + (set (match_dup 0) (match_dup 5))])] +{ + operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0)); + operands[5] + = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + copy_rtx (operands[0]), operands[1]); + operands[6] + = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]), + const0_rtx); +}) + ;; Likewise for instances where we have a lea pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") @@ -28237,6 +28584,54 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movq (%rdi), %rax +;; xorq %rsi, %rax, %rdx +;; movb %rdx, (%rdi) +;; cmpb %rsi, %rax +;; jne +;; to +;; xorb %rsi, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_operand:SWI 4 "register_operand") + (xor:SWI (match_operand:SWI 3 "register_operand") + (match_operand:SWI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 4)) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI 5 "register_operand") + (match_operand:SWI 6 "<nonmemory_operand>")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && (rtx_equal_p (operands[0], operands[5]) + ? rtx_equal_p (operands[2], operands[6]) + : rtx_equal_p (operands[2], operands[5]) + && rtx_equal_p (operands[0], operands[6])) + && peep2_reg_dead_p (3, operands[4]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], QImode) + || any_QIreg_operand (operands[2], QImode))" + [(parallel [(set (match_dup 7) (match_dup 9)) + (set (match_dup 1) (match_dup 8))])] +{ + operands[7] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]), + operands[2]); + operands[9] + = gen_rtx_COMPARE (GET_MODE (operands[7]), + copy_rtx (operands[8]), + const0_rtx); +}) + (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) @@ -28480,6 +28875,58 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movb (%rdi), %al +;; xorl %esi, %eax, %edx +;; movb %dl, (%rdi) +;; cmpb %sil, %al +;; jne +;; to +;; xorl %sil, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI12 0 "register_operand") + (match_operand:SWI12 1 "memory_operand")) + (parallel [(set (match_operand:SI 4 "register_operand") + (xor:SI (match_operand:SI 3 "register_operand") + (match_operand:SI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_operand:SWI12 5 "register_operand")) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI12 6 "register_operand") + (match_operand:SWI12 7 "<nonmemory_operand>")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && REGNO (operands[5]) == REGNO (operands[4]) + && (rtx_equal_p (operands[0], operands[6]) + ? (REG_P (operands[2]) + ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7]) + : rtx_equal_p (operands[2], operands[7])) + : (rtx_equal_p (operands[0], operands[7]) + && REG_P (operands[2]) + && REGNO (operands[2]) == REGNO (operands[6]))) + && peep2_reg_dead_p (3, operands[5]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], SImode) + || any_QIreg_operand (operands[2], SImode))" + [(parallel [(set (match_dup 8) (match_dup 10)) + (set (match_dup 1) (match_dup 9))])] +{ + operands[8] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]), + gen_lowpart (<MODE>mode, operands[2])); + operands[10] + = gen_rtx_COMPARE (GET_MODE (operands[8]), + copy_rtx (operands[9]), + const0_rtx); +}) + ;; Attempt to optimize away memory stores of values the memory already ;; has. See PR79593. (define_peephole2 @@ -29082,6 +29529,23 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) +(define_expand "crc_rev<SWI124:mode>si4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SWI124 2 "nonimmediate_operand") + (match_operand:SI 3)] + "TARGET_CRC32" +{ + /* crc32 uses iSCSI polynomial */ + if (INTVAL (operands[3]) == 0x1EDC6F41) + emit_insn (gen_sse4_2_crc32<mode> (operands[0], operands[1], operands[2])); + else + expand_reversed_crc_table_based (operands[0], operands[1], operands[2], + operands[3], <SWI124:MODE>mode, + generate_reflecting_code_standard); + DONE; +}) + (define_insn "rdpmc" [(set (match_operand:DI 0 "register_operand" "=A") (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")] diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 27d34bd..c93c0b1 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -36,13 +36,6 @@ HOST_WIDE_INT ix86_isa_flags_explicit Variable HOST_WIDE_INT ix86_isa_flags2_explicit -; Indicate if AVX512 and AVX10.1 are explicitly set no. -Variable -int ix86_no_avx512_explicit = 0 - -Variable -int ix86_no_avx10_1_explicit = 0 - ; Additional target flags Variable int ix86_target_flags @@ -103,14 +96,6 @@ HOST_WIDE_INT x_ix86_isa_flags2_explicit TargetSave HOST_WIDE_INT x_ix86_isa_flags_explicit -;; which flags were passed by the user -TargetSave -HOST_WIDE_INT x_ix86_no_avx512_explicit - -;; which flags were passed by the user -TargetSave -HOST_WIDE_INT x_ix86_no_avx10_1_explicit - ;; whether -mtune was not specified TargetSave unsigned char tune_defaulted @@ -721,13 +706,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation. msse4 -Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save +Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation. -mno-sse4 -Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save -Do not support SSE4.1 and SSE4.2 built-in functions and code generation. - msse5 Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed) ;; Deprecated @@ -1355,38 +1336,24 @@ mapx-inline-asm-use-gpr32 Target Var(ix86_apx_inline_asm_use_gpr32) Init(0) Enable GPR32 in inline asm when APX_F enabled. -mevex512 -Target Mask(ISA2_EVEX512) Var(ix86_isa_flags2) Save Warn(%<-mevex512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported) -Support 512 bit vector built-in functions and code generation. - musermsr Target Mask(ISA2_USER_MSR) Var(ix86_isa_flags2) Save Support USER_MSR built-in functions and code generation. -mavx10.1-256 -Target Mask(ISA2_AVX10_1_256) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported) -Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -and AVX10.1-256 built-in functions and code generation. - mavx10.1 -Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported) -Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -and AVX10.1-512 built-in functions and code generation. - -mavx10.1-512 -Target Alias(mavx10.1) +Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -and AVX10.1-512 built-in functions and code generation. +and AVX10.1 built-in functions and code generation. mavx10.2 Target Mask(ISA2_AVX10_2) Var(ix86_isa_flags2) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -AVX10.1-512 and AVX10.2 built-in functions and code generation. +AVX10.1 and AVX10.2 built-in functions and code generation. mamx-avx512 Target Mask(ISA2_AMX_AVX512) Var(ix86_isa_flags2) Save -Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX10.1-512, -AVX10.2 and AMX-AVX512 built-in functions and code generation. +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, +AVX10.1, AVX10.2 and AMX-AVX512 built-in functions and code generation. mamx-tf32 Target Mask(ISA2_AMX_TF32) Var(ix86_isa_flags2) Save diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls index 0d5a5a1..cce524c 100644 --- a/gcc/config/i386/i386.opt.urls +++ b/gcc/config/i386/i386.opt.urls @@ -590,21 +590,12 @@ UrlSuffix(gcc/x86-Options.html#index-mapxf) mapx-inline-asm-use-gpr32 UrlSuffix(gcc/x86-Options.html#index-mapx-inline-asm-use-gpr32) -mevex512 -UrlSuffix(gcc/x86-Options.html#index-mevex512) - musermsr UrlSuffix(gcc/x86-Options.html#index-musermsr) -mavx10.1-256 -UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256) - mavx10.1 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1) -mavx10.1-512 -UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512) - mavx10.2 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2) diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index c30a4e0..b195fe5 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -148,24 +148,14 @@ #include <avx10_2mediaintrin.h> -#include <avx10_2-512mediaintrin.h> - #include <avx10_2convertintrin.h> -#include <avx10_2-512convertintrin.h> - #include <avx10_2bf16intrin.h> -#include <avx10_2-512bf16intrin.h> - #include <avx10_2satcvtintrin.h> -#include <avx10_2-512satcvtintrin.h> - #include <avx10_2minmaxintrin.h> -#include <avx10_2-512minmaxintrin.h> - #include <avx10_2copyintrin.h> #include <movrsintrin.h> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3d3848c..1bd63b2 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -218,6 +218,7 @@ case UNSPEC_DTPOFF: case UNSPEC_GOTNTPOFF: case UNSPEC_NTPOFF: + case UNSPEC_SECREL32: return true; default: break; @@ -392,6 +393,23 @@ return false; }) +;; Return true if VALUE is a constant integer whose negation satisfies +;; x86_64_immediate_operand. +(define_predicate "x86_64_neg_const_int_operand" + (match_code "const_int") +{ + HOST_WIDE_INT val = -UINTVAL (op); + if (mode == DImode && trunc_int_for_mode (val, SImode) != val) + return false; + if (flag_cf_protection & CF_BRANCH) + { + unsigned HOST_WIDE_INT endbr = TARGET_64BIT ? 0xfa1e0ff3 : 0xfb1e0ff3; + if ((val & HOST_WIDE_INT_C (0xffffffff)) == endbr) + return false; + } + return true; +}) + ;; Return true if VALUE is a constant integer whose low and high words satisfy ;; x86_64_immediate_operand. (define_predicate "x86_64_hilo_int_operand" @@ -1267,12 +1285,19 @@ (match_operand 0 "vector_memory_operand") (match_code "const_vector"))) +; Return true when OP is register_operand, vector_memory_operand, +; const_vector zero or const_vector all ones. +(define_predicate "vector_or_0_or_1s_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "vector_memory_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + (define_predicate "bcst_mem_operand" (and (match_code "vec_duplicate") (and (match_test "TARGET_AVX512F") (ior (match_test "TARGET_AVX512VL") - (and (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64") - (match_test "TARGET_EVEX512")))) + (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64"))) (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))") (match_test "GET_MODE (XEXP (op, 0)) == GET_MODE_INNER (GET_MODE (op))") @@ -1333,6 +1358,12 @@ (ior (match_operand 0 "nonimmediate_operand") (match_operand 0 "const0_operand"))) +; Return true when OP is a nonimmediate or zero or all ones. +(define_predicate "nonimm_or_0_or_1s_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + ;; Return true for RTX codes that force SImode address. (define_predicate "SImode_address_operand" (match_code "subreg,zero_extend,and")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b280676..252ba07 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -279,63 +279,63 @@ ;; All vector modes including V?TImode, used in move patterns. (define_mode_iterator VMOVE - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI - (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX") V1TI - (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI + (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) ;; All AVX-512{F,VL} vector modes without HF. Supposed TARGET_AVX512F baseline. (define_mode_iterator V48_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator V48_256_512_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") + V16SF (V8SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL")]) ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline. (define_mode_iterator V48H_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline. (define_mode_iterator VI12_AVX512VL - [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") - (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) + [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) (define_mode_iterator VI12HFBF_AVX512VL - [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") - (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") - (V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") - (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) + [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") + V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") + V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) (define_mode_iterator VI1_AVX512VL - [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) + [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) ;; All vector modes (define_mode_iterator V - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) ;; All 128bit vector modes (define_mode_iterator V_128 @@ -352,54 +352,44 @@ ;; All 512bit vector modes (define_mode_iterator V_512 - [(V64QI "TARGET_EVEX512") (V32HI "TARGET_EVEX512") - (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") - (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512") - (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")]) + [V64QI V32HI V16SI V8DI + V16SF V8DF V32HF V32BF]) ;; All 256bit and 512bit vector modes (define_mode_iterator V_256_512 [V32QI V16HI V16HF V16BF V8SI V4DI V8SF V4DF - (V64QI "TARGET_AVX512F && TARGET_EVEX512") - (V32HI "TARGET_AVX512F && TARGET_EVEX512") - (V32HF "TARGET_AVX512F && TARGET_EVEX512") - (V32BF "TARGET_AVX512F && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") + (V32HF "TARGET_AVX512F") (V32BF "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) ;; All vector float modes (define_mode_iterator VF - [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) (define_mode_iterator VF1_VF2_AVX512DQ - [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512DQ && TARGET_EVEX512") + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ && TARGET_AVX512VL") (V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")]) -(define_mode_iterator VF1_VF2_AVX10_2 - [(V16SF "TARGET_AVX10_2") V8SF V4SF - (V8DF "TARGET_AVX10_2") V4DF V2DF]) - (define_mode_iterator VFH - [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) (define_mode_iterator VF_BHSD - [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2") (V32BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") @@ -408,12 +398,12 @@ ;; 128-, 256- and 512-bit float vector modes for bitwise operations (define_mode_iterator VFB - [(V32BF "TARGET_AVX512F && TARGET_EVEX512") + [(V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") (V8BF "TARGET_SSE2") - (V32HF "TARGET_AVX512F && TARGET_EVEX512") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) ;; 128- and 256-bit float vector modes @@ -430,44 +420,39 @@ ;; All SFmode vector float modes (define_mode_iterator VF1 - [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF]) + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF]) (define_mode_iterator VF1_AVX2 - [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF]) + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF]) ;; 128- and 256-bit SF vector modes (define_mode_iterator VF1_128_256 [(V8SF "TARGET_AVX") V4SF]) (define_mode_iterator VF1_128_256VL - [(V8SF "TARGET_EVEX512") (V4SF "TARGET_AVX512VL")]) + [V8SF (V4SF "TARGET_AVX512VL")]) ;; All DFmode vector float modes (define_mode_iterator VF2 - [(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF]) - -(define_mode_iterator VF2_AVX10_2 - [(V8DF "TARGET_AVX10_2") V4DF V2DF]) + [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) ;; All DFmode & HFmode & BFmode vector float modes (define_mode_iterator VF2HB - [(V32BF "TARGET_AVX10_2") - (V16BF "TARGET_AVX10_2") - (V8BF "TARGET_AVX10_2") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") + (V8BF "TARGET_AVX10_2") (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF]) + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) ;; 128- and 256-bit DF vector modes (define_mode_iterator VF2_128_256 [(V4DF "TARGET_AVX") V2DF]) (define_mode_iterator VF2_512_256 - [(V8DF "TARGET_AVX512F && TARGET_EVEX512") V4DF]) + [(V8DF "TARGET_AVX512F") V4DF]) (define_mode_iterator VF2_512_256VL - [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")]) + [V8DF (V4DF "TARGET_AVX512VL")]) ;; All 128bit vector SF/DF modes (define_mode_iterator VF_128 @@ -484,116 +469,102 @@ ;; All 512bit vector float modes (define_mode_iterator VF_512 - [(V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")]) + [V16SF V8DF]) ;; All 512bit vector float modes for bitwise operations (define_mode_iterator VFB_512 - [(V32BF "TARGET_EVEX512") - (V32HF "TARGET_EVEX512") - (V16SF "TARGET_EVEX512") - (V8DF "TARGET_EVEX512")]) + [V32BF V32HF V16SF V8DF]) (define_mode_iterator V24F_128 [V4SF V8HF V8BF]) (define_mode_iterator VI48_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI1248_AVX512VLBW - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V16QI "TARGET_AVX512VL && TARGET_AVX512BW") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") - (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) - -(define_mode_iterator VI1248_AVX10_2 - [(V64QI "TARGET_AVX10_2") V32QI V16QI - (V32HI "TARGET_AVX10_2") V16HI V8HI - (V16SI "TARGET_AVX10_2") V8SI V4SI - (V8DI "TARGET_AVX10_2") V4DI V2DI]) + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VF_AVX512VL - [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator VFH_AVX512VL - [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + +(define_mode_iterator V48_AVX512VL_4 + [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") + (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")]) + +(define_mode_iterator VI48_AVX512VL_4 + [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")]) -(define_mode_iterator VFH_AVX10_2 - [(V32HF "TARGET_AVX10_2") V16HF V8HF - (V16SF "TARGET_AVX10_2") V8SF V4SF - (V8DF "TARGET_AVX10_2") V4DF V2DF]) +(define_mode_iterator V8_AVX512VL_2 + [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VF2_AVX512VL - [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator VF1_AVX512VL - [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")]) + [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")]) (define_mode_iterator VF1_AVX512BW - [(V16SF "TARGET_AVX512BW && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF]) - -(define_mode_iterator VF1_AVX10_2 - [(V16SF "TARGET_AVX10_2") V8SF V4SF]) + [(V16SF "TARGET_AVX512BW") (V8SF "TARGET_AVX2") V4SF]) (define_mode_iterator VHFBF - [(V32HF "TARGET_EVEX512") V16HF V8HF - (V32BF "TARGET_EVEX512") V16BF V8BF]) + [V32HF V16HF V8HF V32BF V16BF V8BF]) (define_mode_iterator VHFBF_256 [V16HF V16BF]) (define_mode_iterator VHFBF_128 [V8HF V8BF]) (define_mode_iterator VHF_AVX512VL - [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) + [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) (define_mode_iterator VHFBF_AVX512VL - [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") - (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) - -(define_mode_iterator VHF_AVX10_2 - [(V32HF "TARGET_AVX10_2") V16HF V8HF]) + [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") + V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) -(define_mode_iterator VBF_AVX10_2 - [(V32BF "TARGET_AVX10_2") V16BF V8BF]) +(define_mode_iterator VBF + [V32BF V16BF V8BF]) ;; All vector integer modes (define_mode_iterator VI - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI + [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") + (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI]) ;; All vector integer and HF modes (define_mode_iterator VIHFBF - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V8SI "TARGET_AVX") V4SI - (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF]) + [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") + (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI + (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI + (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF]) (define_mode_iterator VI_AVX2 - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI_AVX_AVX512F - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) ;; All QImode vector integer modes (define_mode_iterator VI1 @@ -611,56 +582,50 @@ (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")]) (define_mode_iterator VI8 - [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI]) - -(define_mode_iterator VI8_AVX10_2 - [(V8DI "TARGET_AVX10_2") V4DI V2DI]) + [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) (define_mode_iterator VI8_FVL - [(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")]) + [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI8_AVX512VL - [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI8_256_512 - [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")]) + [V8DI (V4DI "TARGET_AVX512VL")]) (define_mode_iterator VI1_AVX2 [(V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI1_AVX512 - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI1_AVX512F - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI]) (define_mode_iterator VI1_AVX512VNNI - [(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI]) + [(V64QI "TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI1_AVX512VNNIBW - [(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512") + [(V64QI "TARGET_AVX512BW || TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI12_256_512_AVX512VL - [(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL") - (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")]) + [V64QI (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL")]) (define_mode_iterator VI2_AVX2 [(V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI2_AVX2_AVX512BW - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI]) + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI2_AVX512F - [(V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI]) + [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI2_AVX512VNNIBW - [(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512") + [(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI") (V16HI "TARGET_AVX2") V8HI]) -(define_mode_iterator VI2_AVX10_2 - [(V32HI "TARGET_AVX10_2") V16HI V8HI]) - (define_mode_iterator VI4_AVX [(V8SI "TARGET_AVX") V4SI]) @@ -668,65 +633,64 @@ [(V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI4_AVX512F - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI]) + [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI4_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) (define_mode_iterator VI4_AVX10_2 [(V16SI "TARGET_AVX10_2") V8SI V4SI]) (define_mode_iterator VI48_AVX512F_AVX512VL - [V4SI V8SI (V16SI "TARGET_AVX512F && TARGET_EVEX512") + [V4SI V8SI (V16SI "TARGET_AVX512F") (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") - (V8DI "TARGET_AVX512F && TARGET_EVEX512")]) + (V8DI "TARGET_AVX512F")]) (define_mode_iterator VI2_AVX512VL - [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")]) + [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI]) (define_mode_iterator VI2HFBF_AVX512VL - [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512") - (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") (V32HF "TARGET_EVEX512") - (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") (V32BF "TARGET_EVEX512")]) + [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI + (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") V32HF + (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") V32BF]) (define_mode_iterator VI2H_AVX512VL - [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512") - (V8SI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512") - (V8DI "TARGET_EVEX512")]) + [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI + (V8SI "TARGET_AVX512VL") V16SI V8DI]) (define_mode_iterator VI1_AVX512VL_F - [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F && TARGET_EVEX512")]) + [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")]) (define_mode_iterator VI8_AVX2_AVX512BW - [(V8DI "TARGET_AVX512BW && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI]) + [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI8_AVX2 [(V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI8_AVX2_AVX512F - [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI]) + [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI8_AVX_AVX512F - [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")]) + [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")]) (define_mode_iterator VI4_128_8_256 [V4SI V4DI]) ;; All V8D* modes (define_mode_iterator V8FI - [(V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [V8DF V8DI]) ;; All V16S* modes (define_mode_iterator V16FI - [(V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")]) + [V16SF V16SI]) ;; ??? We should probably use TImode instead. (define_mode_iterator VIMAX_AVX2_AVX512BW - [(V4TI "TARGET_AVX512BW && TARGET_EVEX512") (V2TI "TARGET_AVX2") V1TI]) + [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI]) ;; Suppose TARGET_AVX512BW as baseline (define_mode_iterator VIMAX_AVX512VL - [(V4TI "TARGET_EVEX512") (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")]) + [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")]) (define_mode_iterator VIMAX_AVX2 [(V2TI "TARGET_AVX2") V1TI]) @@ -736,17 +700,17 @@ (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI12_AVX2_AVX512BW - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI24_AVX2 [(V16HI "TARGET_AVX2") V8HI (V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI124_AVX2 [(V32QI "TARGET_AVX2") V16QI @@ -754,17 +718,17 @@ (V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI248_AVX512VL - [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") + [V32HI V16SI V8DI (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI248_AVX512VLBW - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") - (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI48_AVX2 [(V8SI "TARGET_AVX2") V4SI @@ -776,17 +740,16 @@ (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI - (V16SI "TARGET_AVX512BW && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI]) + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI248_AVX512BW - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16SI "TARGET_EVEX512") - (V8DI "TARGET_EVEX512")]) + [(V32HI "TARGET_AVX512BW") V16SI V8DI]) (define_mode_iterator VI248_AVX512BW_AVX512VL - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V4DI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [(V32HI "TARGET_AVX512BW") + (V4DI "TARGET_AVX512VL") V16SI V8DI]) ;; Suppose TARGET_AVX512VL as baseline (define_mode_iterator VI248_AVX512BW_1 @@ -800,16 +763,16 @@ V4DI V2DI]) (define_mode_iterator VI48_AVX512F - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") V8SI V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI V2DI]) + [(V16SI "TARGET_AVX512F") V8SI V4SI + (V8DI "TARGET_AVX512F") V4DI V2DI]) (define_mode_iterator VI48_AVX_AVX512F - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI]) + [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) (define_mode_iterator VI12_AVX_AVX512F - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI]) (define_mode_iterator V48_128_256 [V4SF V2DF @@ -950,10 +913,10 @@ (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) (define_mode_iterator VI248_256 [V16HI V8SI V4DI]) (define_mode_iterator VI248_512 - [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [V32HI V16SI V8DI]) (define_mode_iterator VI48_128 [V4SI V2DI]) (define_mode_iterator VI148_512 - [(V64QI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [V64QI V16SI V8DI]) (define_mode_iterator VI148_256 [V32QI V8SI V4DI]) (define_mode_iterator VI148_128 [V16QI V4SI V2DI]) @@ -961,75 +924,62 @@ (define_mode_iterator VI124_256 [V32QI V16HI V8SI]) (define_mode_iterator VI124_256_AVX512F_AVX512BW [V32QI V16HI V8SI - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW") + (V16SI "TARGET_AVX512F")]) (define_mode_iterator VI48_256 [V8SI V4DI]) (define_mode_iterator VI48_512 - [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [V16SI V8DI]) (define_mode_iterator VI4_256_8_512 [V8SI V8DI]) (define_mode_iterator VI_AVX512BW - [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512")]) + [V16SI V8DI + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) (define_mode_iterator VIHFBF_AVX512BW - [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") - (V32HF "TARGET_AVX512BW && TARGET_EVEX512") - (V32BF "TARGET_AVX512BW && TARGET_EVEX512")]) + [V16SI V8DI + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW") + (V32HF "TARGET_AVX512BW") (V32BF "TARGET_AVX512BW")]) ;; Int-float size matches (define_mode_iterator VI2F_256_512 - [V16HI (V32HI "TARGET_EVEX512") - V16HF (V32HF "TARGET_EVEX512") - V16BF (V32BF "TARGET_EVEX512")]) + [V16HI V32HI V16HF V32HF V16BF V32BF]) (define_mode_iterator VI4F_128 [V4SI V4SF]) (define_mode_iterator VI8F_128 [V2DI V2DF]) (define_mode_iterator VI4F_256 [V8SI V8SF]) (define_mode_iterator VI8F_256 [V4DI V4DF]) (define_mode_iterator VI4F_256_512 - [V8SI V8SF - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512")]) + [V8SI V8SF (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")]) (define_mode_iterator VI48F_256_512 [V8SI V8SF - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) + (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F") + (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) (define_mode_iterator VF48H_AVX512VL - [(V8DF "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")]) + [V8DF V16SF (V8SF "TARGET_AVX512VL")]) (define_mode_iterator VF48_128 [V2DF V4SF]) (define_mode_iterator VI48F - [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") - (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512") + [V16SI V16SF V8DI V8DF (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator VI12_VI48F_AVX512VL - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") + [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") - (V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") - (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) + V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF]) (define_mode_iterator V8_128 [V8HI V8HF V8BF]) (define_mode_iterator V16_256 [V16HI V16HF V16BF]) (define_mode_iterator V32_512 - [(V32HI "TARGET_EVEX512") (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")]) + [V32HI V32HF V32BF]) ;; Mapping from float mode to required SSE level (define_mode_attr sse @@ -1441,7 +1391,7 @@ ;; Mix-n-match (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) (define_mode_iterator AVX512MODE2P - [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")]) + [V16SI V16SF V8DF]) ;; Mapping for dbpsabbw modes (define_mode_attr dbpsadbwmode @@ -1639,6 +1589,44 @@ "&& 1" [(set (match_dup 0) (match_dup 1))]) +(define_insn_and_split "*<avx512>_load<mode>mask_and15" + [(set (match_operand:V48_AVX512VL_4 0 "register_operand" "=v") + (vec_merge:V48_AVX512VL_4 + (unspec:V48_AVX512VL_4 + [(match_operand:V48_AVX512VL_4 1 "memory_operand" "m")] + UNSPEC_MASKLOAD) + (match_operand:V48_AVX512VL_4 2 "nonimm_or_0_operand" "0C") + (and:QI + (match_operand:QI 3 "register_operand" "Yk") + (const_int 15))))] + "TARGET_AVX512F" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V48_AVX512VL_4 + (unspec:V48_AVX512VL_4 [(match_dup 1)] UNSPEC_MASKLOAD) + (match_dup 2) + (match_dup 3)))]) + +(define_insn_and_split "*<avx512>_load<mode>mask_and3" + [(set (match_operand:V8_AVX512VL_2 0 "register_operand" "=v") + (vec_merge:V8_AVX512VL_2 + (unspec:V8_AVX512VL_2 + [(match_operand:V8_AVX512VL_2 1 "memory_operand" "m")] + UNSPEC_MASKLOAD) + (match_operand:V8_AVX512VL_2 2 "nonimm_or_0_operand" "0C") + (and:QI + (match_operand:QI 3 "register_operand" "Yk") + (const_int 3))))] + "TARGET_AVX512F" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V8_AVX512VL_2 + (unspec:V8_AVX512VL_2 [(match_dup 1)] UNSPEC_MASKLOAD) + (match_dup 2) + (match_dup 3)))]) + (define_expand "<avx512>_load<mode>_mask" [(set (match_operand:VI12_AVX512VL 0 "register_operand") (vec_merge:VI12_AVX512VL @@ -2049,11 +2037,9 @@ (define_mode_iterator STORENT_MODE [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) (define_expand "storent<mode>" [(set (match_operand:STORENT_MODE 0 "memory_operand") @@ -2857,10 +2843,10 @@ }) (define_expand "div<mode>3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand") - (div:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "register_operand") - (match_operand:VBF_AVX10_2 2 "vector_operand")))] + [(set (match_operand:VBF 0 "register_operand") + (div:VBF + (match_operand:VBF 1 "register_operand") + (match_operand:VBF 2 "vector_operand")))] "TARGET_AVX10_2" { if (TARGET_RECIP_VEC_DIV @@ -3897,15 +3883,12 @@ (define_mode_iterator REDUC_PLUS_MODE [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512") + (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V32QI "TARGET_AVX") (V16HI "TARGET_AVX") (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") - (V64QI "TARGET_AVX512F && TARGET_EVEX512") - (V32HI "TARGET_AVX512F && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")]) (define_expand "reduc_plus_scal_<mode>" [(plus:REDUC_PLUS_MODE @@ -3948,13 +3931,11 @@ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") - (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512BW") + (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F") + (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) (define_expand "reduc_<code>_scal_<mode>" [(smaxmin:REDUC_SMINMAX_MODE @@ -4063,10 +4044,8 @@ (define_mode_iterator REDUC_ANY_LOGIC_MODE [(V32QI "TARGET_AVX") (V16HI "TARGET_AVX") (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") - (V64QI "TARGET_AVX512F && TARGET_EVEX512") - (V32HI "TARGET_AVX512F && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")]) (define_expand "reduc_<code>_scal_<mode>" [(any_logic:REDUC_ANY_LOGIC_MODE @@ -4410,7 +4389,7 @@ (unspec:<V48H_AVX512VL:avx512fmaskmode> [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v") (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm") - (match_operand:SI 3 "const_0_to_7_operand" "n")] + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] UNSPEC_PCMP)))] "TARGET_AVX512F && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) @@ -4428,7 +4407,7 @@ (unspec:<V48H_AVX512VL:avx512fmaskmode> [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + (match_operand:SI 3 "<cmp_imm_predicate>")] UNSPEC_PCMP))) (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand") (unspec:<V48H_AVX512VL:avx512fmaskmode> @@ -4469,7 +4448,8 @@ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "<cmp_imm_predicate>" "n")] UNSPEC_PCMP)))] - "TARGET_AVX512F && ix86_pre_reload_split ()" + "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8 + && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -4480,6 +4460,70 @@ UNSPEC_PCMP))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_insn "*<avx512>_cmp<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v") + (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] + UNSPEC_PCMP) + (const_int 15)))] + "TARGET_AVX512F" + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*<avx512>_ucmp<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v") + (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP) + (const_int 15)))] + "TARGET_AVX512F" + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*<avx512>_cmp<mode>3_and3" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v") + (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] + UNSPEC_PCMP) + (const_int 3)))] + "TARGET_AVX512F" + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512vl_ucmpv2di3_and3" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V2DI 1 "nonimmediate_operand" "v") + (match_operand:V2DI 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP) + (const_int 3)))] + "TARGET_AVX512F" + "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> @@ -4762,7 +4806,8 @@ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "const_0_to_7_operand")] UNSPEC_UNSIGNED_PCMP)))] - "TARGET_AVX512F && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split () + && GET_MODE_NUNITS (<MODE>mode) >= 8" "#" "&& 1" [(set (match_dup 0) @@ -4923,8 +4968,8 @@ (define_expand "vec_cmp<mode><avx512fmaskmodelower>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (match_operator:<avx512fmaskmode> 1 "" - [(match_operand:VBF_AVX10_2 2 "register_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")]))] + [(match_operand:VBF 2 "register_operand") + (match_operand:VBF 3 "nonimmediate_operand")]))] "TARGET_AVX10_2" { bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]), @@ -5142,7 +5187,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_256_AVX2 0 "register_operand") (vec_merge:VI_256_AVX2 - (match_operand:VI_256_AVX2 1 "nonimmediate_operand") + (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand") (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5155,7 +5200,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_128 0 "register_operand") (vec_merge:VI_128 - (match_operand:VI_128 1 "vector_operand") + (match_operand:VI_128 1 "vector_or_0_or_1s_operand") (match_operand:VI_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE2" @@ -5168,7 +5213,7 @@ (define_expand "vcond_mask_v1tiv1ti" [(set (match_operand:V1TI 0 "register_operand") (vec_merge:V1TI - (match_operand:V1TI 1 "vector_operand") + (match_operand:V1TI 1 "vector_or_0_or_1s_operand") (match_operand:V1TI 2 "nonimm_or_0_operand") (match_operand:V1TI 3 "register_operand")))] "TARGET_SSE2" @@ -5181,7 +5226,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_256 0 "register_operand") (vec_merge:VF_256 - (match_operand:VF_256 1 "nonimmediate_operand") + (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand") (match_operand:VF_256 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5194,7 +5239,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_128 0 "register_operand") (vec_merge:VF_128 - (match_operand:VF_128 1 "vector_operand") + (match_operand:VF_128 1 "vector_or_0_or_1s_operand") (match_operand:VF_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE" @@ -5573,7 +5618,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") @@ -5630,7 +5675,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx_noavx512vl,avx512vl,avx512f_512") + [(set_attr "isa" "noavx,avx_noavx512f,avx512vl,avx512f") (set_attr "addr" "*,gpr16,*,*") (set_attr "type" "sselog") (set (attr "prefix_data16") @@ -5703,7 +5748,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") @@ -5765,7 +5810,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -5811,15 +5856,10 @@ (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") - (V8BF "TARGET_AVX10_2") - (V16BF "TARGET_AVX10_2") - (V32BF "TARGET_AVX10_2")]) + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") + (HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V32HF "TARGET_AVX512FP16") + (V8BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") (V32BF "TARGET_AVX10_2")]) (define_expand "fma<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -5857,8 +5897,7 @@ (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512")]) + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) @@ -5928,14 +5967,12 @@ ;; Suppose AVX-512F as baseline (define_mode_iterator VFH_SF_AVX512VL - [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (HF "TARGET_AVX512FP16") - SF (V16SF "TARGET_EVEX512") - (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - DF (V8DF "TARGET_EVEX512") - (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") @@ -8683,7 +8720,7 @@ (unspec:V16SI [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_FIX_NOTRUNC))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -8751,7 +8788,7 @@ (unspec:V16SI [(match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_VCVTT_U))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvttps2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -8761,7 +8798,7 @@ [(set (match_operand:V16SI 0 "register_operand" "=v") (any_fix:V16SI (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -9349,7 +9386,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtdq2pd\t{%t1, %0|%0, %t1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -9385,7 +9422,7 @@ (unspec:V8SI [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_FIX_NOTRUNC))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -9544,7 +9581,7 @@ (unspec:V8SI [(match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_VCVTT_U))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvttpd2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -9554,7 +9591,7 @@ [(set (match_operand:V8SI 0 "register_operand" "=v") (any_fix:V8SI (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -10070,7 +10107,7 @@ [(set (match_operand:V8SF 0 "register_operand" "=v") (float_truncate:V8SF (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -10232,7 +10269,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtps2pd\t{%t1, %0|%0, %t1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -10438,7 +10475,7 @@ (set (match_operand:V8DF 0 "register_operand") (float_extend:V8DF (match_dup 2)))] -"TARGET_AVX512F && TARGET_EVEX512" +"TARGET_AVX512F" "operands[2] = gen_reg_rtx (V8SFmode);") (define_expand "vec_unpacks_lo_v4sf" @@ -10576,7 +10613,7 @@ (set (match_operand:V8DF 0 "register_operand") (float:V8DF (match_dup 2)))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "operands[2] = gen_reg_rtx (V8SImode);") (define_expand "vec_unpacks_float_lo_v16si" @@ -10588,7 +10625,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_expand "vec_unpacku_float_hi_v4si" [(set (match_dup 5) @@ -10684,7 +10721,7 @@ (define_expand "vec_unpacku_float_hi_v16si" [(match_operand:V8DF 0 "register_operand") (match_operand:V16SI 1 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { REAL_VALUE_TYPE TWO32r; rtx k, x, tmp[4]; @@ -10733,7 +10770,7 @@ (define_expand "vec_unpacku_float_lo_v16si" [(match_operand:V8DF 0 "register_operand") (match_operand:V16SI 1 "nonimmediate_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { REAL_VALUE_TYPE TWO32r; rtx k, x, tmp[3]; @@ -10827,7 +10864,7 @@ [(match_operand:V16SI 0 "register_operand") (match_operand:V8DF 1 "nonimmediate_operand") (match_operand:V8DF 2 "nonimmediate_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { rtx r1, r2; @@ -10942,7 +10979,7 @@ [(match_operand:V16SI 0 "register_operand") (match_operand:V8DF 1 "nonimmediate_operand") (match_operand:V8DF 2 "nonimmediate_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { rtx r1, r2; @@ -11135,7 +11172,7 @@ (const_int 11) (const_int 27) (const_int 14) (const_int 30) (const_int 15) (const_int 31)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -11223,7 +11260,7 @@ (const_int 9) (const_int 25) (const_int 12) (const_int 28) (const_int 13) (const_int 29)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -11363,7 +11400,7 @@ (const_int 11) (const_int 11) (const_int 13) (const_int 13) (const_int 15) (const_int 15)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") @@ -11416,7 +11453,7 @@ (const_int 10) (const_int 10) (const_int 12) (const_int 12) (const_int 14) (const_int 14)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") @@ -12376,9 +12413,7 @@ (V8SF "32x4") (V8SI "32x4") (V4DF "64x2") (V4DI "64x2")]) (define_mode_iterator AVX512_VEC - [(V8DF "TARGET_AVX512DQ && TARGET_EVEX512") - (V8DI "TARGET_AVX512DQ && TARGET_EVEX512") - (V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")]) + [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI]) (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask" [(match_operand:<ssequartermode> 0 "nonimmediate_operand") @@ -12547,9 +12582,7 @@ [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")]) (define_mode_iterator AVX512_VEC_2 - [(V16SF "TARGET_AVX512DQ && TARGET_EVEX512") - (V16SI "TARGET_AVX512DQ && TARGET_EVEX512") - (V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI]) (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask" [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") @@ -13110,7 +13143,7 @@ (const_int 26) (const_int 27) (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { if (TARGET_AVX512VL @@ -13159,7 +13192,7 @@ (const_int 58) (const_int 59) (const_int 60) (const_int 61) (const_int 62) (const_int 63)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog1") (set_attr "length_immediate" "1") @@ -13257,15 +13290,15 @@ ;; Modes handled by vec_extract patterns. (define_mode_iterator VEC_EXTRACT_MODE - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF - (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI + (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF + (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")]) (define_expand "vec_extract<mode><ssescalarmodelower>" [(match_operand:<ssescalarmode> 0 "register_operand") @@ -13307,7 +13340,7 @@ (const_int 3) (const_int 11) (const_int 5) (const_int 13) (const_int 7) (const_int 15)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -13421,9 +13454,9 @@ (const_int 2) (const_int 10) (const_int 4) (const_int 12) (const_int 6) (const_int 14)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" - [(set_attr "type" "sselog1") + [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "V8DF")]) @@ -13437,7 +13470,7 @@ (const_int 2) (const_int 10) (const_int 4) (const_int 12) (const_int 6) (const_int 14)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -13454,7 +13487,7 @@ (const_int 2) (const_int 6)])))] "TARGET_AVX && <mask_avx512vl_condition>" "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" - [(set_attr "type" "sselog1") + [(set_attr "type" "ssemov") (set_attr "prefix" "<mask_prefix>") (set_attr "mode" "V4DF")]) @@ -13649,7 +13682,7 @@ (match_operand:SI 4 "const_0_to_255_operand")] UNSPEC_VTERNLOG))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) /* Disallow embeded broadcast for vector HFmode since it's not real AVX512FP16 instruction. */ && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4 @@ -13731,7 +13764,7 @@ [(set (match_operand:V 0 "register_operand") (match_operand:V 1 "ternlog_operand"))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split ()" "#" "&& 1" @@ -13761,7 +13794,7 @@ (match_operand:V 3 "regmem_or_bitnot_regmem_operand") (match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split () && (rtx_equal_p (STRIP_UNARY (operands[1]), STRIP_UNARY (operands[4])) @@ -13846,7 +13879,7 @@ (match_operand:V 3 "regmem_or_bitnot_regmem_operand")) (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split () && (rtx_equal_p (STRIP_UNARY (operands[1]), STRIP_UNARY (operands[4])) @@ -13930,7 +13963,7 @@ (match_operand:V 2 "regmem_or_bitnot_regmem_operand")) (match_operand:V 3 "regmem_or_bitnot_regmem_operand")))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split ()" "#" "&& 1" @@ -14080,7 +14113,7 @@ (match_operand:SI 3 "const_0_to_255_operand") (match_operand:V16SF 4 "register_operand") (match_operand:HI 5 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int mask = INTVAL (operands[3]); emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2], @@ -14267,7 +14300,7 @@ (match_operand 16 "const_12_to_15_operand") (match_operand 17 "const_28_to_31_operand") (match_operand 18 "const_28_to_31_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) @@ -14302,7 +14335,7 @@ (match_operand:SI 3 "const_0_to_255_operand") (match_operand:V8DF 4 "register_operand") (match_operand:QI 5 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int mask = INTVAL (operands[3]); emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2], @@ -14332,7 +14365,7 @@ (match_operand 8 "const_12_to_13_operand") (match_operand 9 "const_6_to_7_operand") (match_operand 10 "const_14_to_15_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int mask; mask = INTVAL (operands[3]); @@ -14464,7 +14497,7 @@ (const_int 3) (const_int 11) (const_int 5) (const_int 13) (const_int 7) (const_int 15)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -14514,7 +14547,7 @@ (const_int 2) (const_int 10) (const_int 4) (const_int 12) (const_int 6) (const_int 14)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -14880,7 +14913,7 @@ (set_attr "mode" "V2DF,DF,V8DF") (set (attr "enabled") (cond [(eq_attr "alternative" "2") - (symbol_ref "TARGET_AVX512F && TARGET_EVEX512 + (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL && !TARGET_PREFER_AVX256") (match_test "<mask_avx512vl_condition>") (const_string "*") @@ -14965,13 +14998,13 @@ [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand") (truncate:PMOV_DST_MODE_1 (match_operand:<pmov_src_mode> 1 "register_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn "*avx512f_<code><pmov_src_lower><mode>2" [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") (any_truncate:PMOV_DST_MODE_1 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "none,store") @@ -14993,7 +15026,7 @@ (const_int 10) (const_int 11) (const_int 12) (const_int 13) (const_int 14) (const_int 15)])))] - "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512BW && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -15018,7 +15051,7 @@ (const_int 10) (const_int 11) (const_int 12) (const_int 13) (const_int 14) (const_int 15)])))] - "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512BW && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -15102,7 +15135,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] - "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -15118,7 +15151,7 @@ (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")) (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "none,store") @@ -15132,19 +15165,19 @@ (match_operand:<pmov_src_mode> 1 "register_operand")) (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_expand "truncv32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand") (truncate:V32QI (match_operand:V32HI 1 "register_operand")))] - "TARGET_AVX512BW && TARGET_EVEX512") + "TARGET_AVX512BW") (define_insn "avx512bw_<code>v32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") (any_truncate:V32QI (match_operand:V32HI 1 "register_operand" "v,v")))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "none,store") @@ -15174,7 +15207,7 @@ (const_int 26) (const_int 27) (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] - "TARGET_AVX512VBMI && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512VBMI && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -15190,7 +15223,7 @@ (match_operand:V32HI 1 "register_operand" "v,v")) (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0") (match_operand:SI 3 "register_operand" "Yk,Yk")))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "none,store") @@ -15204,7 +15237,7 @@ (match_operand:V32HI 1 "register_operand")) (match_dup 0) (match_operand:SI 2 "register_operand")))] - "TARGET_AVX512BW && TARGET_EVEX512") + "TARGET_AVX512BW") (define_mode_iterator PMOV_DST_MODE_2 [V4SI V8HI (V16QI "TARGET_AVX512BW")]) @@ -16062,7 +16095,7 @@ [(set (match_operand:V8QI 0 "register_operand") (truncate:V8QI (match_operand:V8DI 1 "register_operand")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { rtx op0 = gen_reg_rtx (V16QImode); @@ -16082,7 +16115,7 @@ (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -16092,7 +16125,7 @@ [(set (match_operand:V8QI 0 "memory_operand" "=m") (any_truncate:V8QI (match_operand:V8DI 1 "register_operand" "v")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") @@ -16104,7 +16137,7 @@ (subreg:DI (any_truncate:V8QI (match_operand:V8DI 1 "register_operand")) 0))] - "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -16128,7 +16161,7 @@ (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -16149,7 +16182,7 @@ (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -16162,7 +16195,7 @@ (match_operand:V8DI 1 "register_operand" "v")) (match_dup 0) (match_operand:QI 2 "register_operand" "Yk")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") @@ -16174,7 +16207,7 @@ (any_truncate:V8QI (match_operand:V8DI 1 "register_operand")) (match_operand:QI 2 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { operands[0] = adjust_address_nv (operands[0], V8QImode, 0); emit_insn (gen_avx512f_<code>v8div16qi2_mask_store_1 (operands[0], @@ -16431,7 +16464,7 @@ (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)])))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);") (define_insn "*vec_widen_umult_even_v16si<mask_name>" @@ -16451,7 +16484,7 @@ (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)])))))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sseimul") @@ -16547,7 +16580,7 @@ (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)])))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);") (define_insn "*vec_widen_smult_even_v16si<mask_name>" @@ -16567,7 +16600,7 @@ (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)])))))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sseimul") @@ -16969,7 +17002,7 @@ "TARGET_SSE2" { /* Try with vnni instructions. */ - if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI && TARGET_EVEX512) + if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI) || (<MODE_SIZE> < 64 && ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI))) { @@ -17062,7 +17095,7 @@ (match_operand:V64QI 1 "register_operand") (match_operand:V64QI 2 "nonimmediate_operand") (match_operand:V16SI 3 "nonimmediate_operand")] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" { rtx t1 = gen_reg_rtx (V8DImode); rtx t2 = gen_reg_rtx (V16SImode); @@ -18300,13 +18333,10 @@ (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") (V16HF "TARGET_AVX512FP16") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")]) + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI") + (V32HF "TARGET_AVX512FP16")]) (define_expand "vec_perm<mode>" [(match_operand:VEC_PERM_AVX2 0 "register_operand") @@ -18333,7 +18363,7 @@ { operands[2] = CONSTM1_RTX (<MODE>mode); - if (!TARGET_AVX512F || (!TARGET_AVX512VL && !TARGET_EVEX512)) + if (!TARGET_AVX512F) operands[2] = force_reg (<MODE>mode, operands[2]); }) @@ -18342,7 +18372,6 @@ (xor:VI (match_operand:VI 1 "bcst_vector_operand" " 0, m,Br") (match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))] "TARGET_AVX512F - && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512) && (!<mask_applied> || <ssescalarmode>mode == SImode || <ssescalarmode>mode == DImode)" @@ -18409,7 +18438,7 @@ (match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC"))) (unspec [(match_operand:VI 3 "register_operand" "0,0,0")] UNSPEC_INSN_FALSE_DEP)] - "TARGET_AVX512F && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)" + "TARGET_AVX512F" { if (TARGET_AVX512VL) return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}"; @@ -18433,7 +18462,7 @@ (not:<ssescalarmode> (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))))] "<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)" + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" [(set (match_dup 0) (xor:VI48_AVX512F (vec_duplicate:VI48_AVX512F (match_dup 1)) @@ -18587,8 +18616,7 @@ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") (eq_attr "alternative" "4") (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 - && !TARGET_PREFER_AVX256)") + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") ] (const_string "*")))]) @@ -18632,7 +18660,7 @@ (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))) (match_operand:VI 2 "vector_operand")))] "<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)" + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" [(set (match_dup 3) (vec_duplicate:VI (match_dup 1))) (set (match_dup 0) @@ -18647,7 +18675,7 @@ (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))) (match_operand:VI 2 "vector_operand")))] "<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)" + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" [(set (match_dup 3) (vec_duplicate:VI (match_dup 1))) (set (match_dup 0) @@ -18941,7 +18969,7 @@ (match_operand:VI 1 "bcst_vector_operand" "0,m, 0,vBr")) (match_operand:VI 2 "bcst_vector_operand" "m,0,vBr, 0")))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && (register_operand (operands[1], <MODE>mode) || register_operand (operands[2], <MODE>mode))" { @@ -18974,7 +19002,7 @@ (match_operand:VI 1 "bcst_vector_operand" "%0, 0") (match_operand:VI 2 "bcst_vector_operand" " m,vBr"))))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && (register_operand (operands[1], <MODE>mode) || register_operand (operands[2], <MODE>mode))" { @@ -19005,7 +19033,7 @@ (not:VI (match_operand:VI 1 "bcst_vector_operand" "%0, 0")) (not:VI (match_operand:VI 2 "bcst_vector_operand" "m,vBr"))))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && (register_operand (operands[1], <MODE>mode) || register_operand (operands[2], <MODE>mode))" { @@ -19027,7 +19055,7 @@ (const_string "*")))]) (define_mode_iterator AVX512ZEXTMASK - [(DI "TARGET_AVX512BW && TARGET_EVEX512") (SI "TARGET_AVX512BW") HI]) + [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI]) (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") @@ -19276,7 +19304,7 @@ (const_int 60) (const_int 61) (const_int 62) (const_int 63)])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "<mask_prefix>") @@ -19345,7 +19373,7 @@ (const_int 14) (const_int 15) (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "<mask_prefix>") @@ -19407,7 +19435,7 @@ (const_int 61) (const_int 125) (const_int 62) (const_int 126) (const_int 63) (const_int 127)])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -19503,7 +19531,7 @@ (const_int 53) (const_int 117) (const_int 54) (const_int 118) (const_int 55) (const_int 119)])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -19727,7 +19755,7 @@ (const_int 11) (const_int 27) (const_int 14) (const_int 30) (const_int 15) (const_int 31)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -19782,7 +19810,7 @@ (const_int 9) (const_int 25) (const_int 12) (const_int 28) (const_int 13) (const_int 29)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -20488,7 +20516,7 @@ (match_operand:SI 2 "const_0_to_255_operand") (match_operand:V16SI 3 "register_operand") (match_operand:HI 4 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int mask = INTVAL (operands[2]); emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1], @@ -20532,7 +20560,7 @@ (match_operand 15 "const_12_to_15_operand") (match_operand 16 "const_12_to_15_operand") (match_operand 17 "const_12_to_15_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) @@ -20698,7 +20726,7 @@ [(match_operand:V32HI 1 "nonimmediate_operand" "vm") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_PSHUFLW))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -20874,7 +20902,7 @@ [(match_operand:V32HI 1 "nonimmediate_operand" "vm") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_PSHUFHW))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -21408,7 +21436,7 @@ (match_operand:V4TI 1 "register_operand" "v") (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vextracti32x4\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "length_immediate" "1") @@ -21416,7 +21444,7 @@ (set_attr "mode" "XI")]) (define_mode_iterator VEXTRACTI128_MODE - [(V4TI "TARGET_AVX512F && TARGET_EVEX512") V2TI]) + [(V4TI "TARGET_AVX512F") V2TI]) (define_split [(set (match_operand:TI 0 "nonimmediate_operand") @@ -21439,7 +21467,7 @@ && VECTOR_MODE_P (GET_MODE (operands[1])) && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16) || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32) - || (TARGET_AVX512F && TARGET_EVEX512 + || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64)) && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))" [(set (match_dup 0) (vec_select:SWI48x (match_dup 1) @@ -22814,7 +22842,7 @@ (const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1))))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sseimul") (set_attr "prefix" "evex") @@ -23328,10 +23356,10 @@ ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI ;; modes for abs instruction on pre AVX-512 targets. (define_mode_iterator VI1248_AVX512VL_AVX512BW - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX512VL") + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_insn "*abs<mode>2" @@ -24159,7 +24187,7 @@ [(set (match_operand:V32HI 0 "register_operand" "=v") (any_extend:V32HI (match_operand:V32QI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24173,7 +24201,7 @@ (match_operand:V64QI 2 "const0_operand")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand")])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))] @@ -24193,7 +24221,7 @@ (match_operand:V64QI 3 "const0_operand")) (match_parallel 4 "pmovzx_parallel" [(match_operand 5 "const_int_operand")])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))] @@ -24206,7 +24234,7 @@ [(set (match_operand:V32HI 0 "register_operand") (any_extend:V32HI (match_operand:V32QI 1 "nonimmediate_operand")))] - "TARGET_AVX512BW && TARGET_EVEX512") + "TARGET_AVX512BW") (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>" [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw") @@ -24354,7 +24382,7 @@ [(set (match_operand:V16SI 0 "register_operand" "=v") (any_extend:V16SI (match_operand:V16QI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24364,7 +24392,7 @@ [(set (match_operand:V16SI 0 "register_operand") (any_extend:V16SI (match_operand:V16QI 1 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn "avx2_<code>v8qiv8si2<mask_name>" [(set (match_operand:V8SI 0 "register_operand" "=v") @@ -24497,7 +24525,7 @@ [(set (match_operand:V16SI 0 "register_operand" "=v") (any_extend:V16SI (match_operand:V16HI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24507,7 +24535,7 @@ [(set (match_operand:V16SI 0 "register_operand") (any_extend:V16SI (match_operand:V16HI 1 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1" [(set (match_operand:V32HI 0 "register_operand" "=v") @@ -24517,7 +24545,7 @@ (match_operand:V32HI 2 "const0_operand")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))] @@ -24741,7 +24769,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24751,7 +24779,7 @@ [(set (match_operand:V8DI 0 "register_operand" "=v") (any_extend:V8DI (match_operand:V8QI 1 "memory_operand" "m")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24769,7 +24797,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -24780,7 +24808,7 @@ [(set (match_operand:V8DI 0 "register_operand") (any_extend:V8DI (match_operand:V8QI 1 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { if (!MEM_P (operands[1])) { @@ -24922,7 +24950,7 @@ [(set (match_operand:V8DI 0 "register_operand" "=v") (any_extend:V8DI (match_operand:V8HI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24932,7 +24960,7 @@ [(set (match_operand:V8DI 0 "register_operand") (any_extend:V8DI (match_operand:V8HI 1 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn "avx2_<code>v4hiv4di2<mask_name>" [(set (match_operand:V4DI 0 "register_operand" "=v") @@ -25059,7 +25087,7 @@ [(set (match_operand:V8DI 0 "register_operand" "=v") (any_extend:V8DI (match_operand:V8SI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -25073,7 +25101,7 @@ (match_operand:V16SI 2 "const0_operand")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))] @@ -25092,7 +25120,7 @@ (match_operand:V16SI 3 "const0_operand")) (match_parallel 4 "pmovzx_parallel" [(match_operand 5 "const_int_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))] @@ -25104,7 +25132,7 @@ [(set (match_operand:V8DI 0 "register_operand" "=v") (any_extend:V8DI (match_operand:V8SI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn "avx2_<code>v4siv4di2<mask_name>" [(set (match_operand:V4DI 0 "register_operand" "=v") @@ -25505,7 +25533,7 @@ [(match_operand:V16SI 0 "register_operand") (match_operand:V16SF 1 "nonimmediate_operand") (match_operand:SI 2 "const_0_to_15_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { rtx tmp = gen_reg_rtx (V16SFmode); emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2])); @@ -26723,7 +26751,7 @@ (ashiftrt:V8DI (match_operand:V8DI 1 "register_operand") (match_operand:V8DI 2 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_expand "vashrv4di3" [(set (match_operand:V4DI 0 "register_operand") @@ -26814,7 +26842,7 @@ [(set (match_operand:V16SI 0 "register_operand") (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand") (match_operand:V16SI 2 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_expand "vashrv8si3" [(set (match_operand:V8SI 0 "register_operand") @@ -27257,12 +27285,12 @@ (set_attr "mode" "OI")]) (define_mode_attr pbroadcast_evex_isa - [(V64QI "avx512bw_512") (V32QI "avx512bw") (V16QI "avx512bw") - (V32HI "avx512bw_512") (V16HI "avx512bw") (V8HI "avx512bw") - (V16SI "avx512f_512") (V8SI "avx512f") (V4SI "avx512f") - (V8DI "avx512f_512") (V4DI "avx512f") (V2DI "avx512f") - (V32HF "avx512bw_512") (V16HF "avx512bw") (V8HF "avx512bw") - (V32BF "avx512bw_512") (V16BF "avx512bw") (V8BF "avx512bw")]) + [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw") + (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw") + (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f") + (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f") + (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw") + (V32BF "avx512bw") (V16BF "avx512bw") (V8BF "avx512bw")]) (define_insn "avx2_pbroadcast<mode>" [(set (match_operand:VIHFBF 0 "register_operand" "=x,v") @@ -27806,7 +27834,7 @@ (set (attr "enabled") (if_then_else (eq_attr "alternative" "1") (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL - && TARGET_EVEX512 && !TARGET_PREFER_AVX256") + && !TARGET_PREFER_AVX256") (const_string "*")))]) (define_insn "*vec_dupv4si" @@ -27834,7 +27862,7 @@ (set (attr "enabled") (if_then_else (eq_attr "alternative" "1") (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL - && TARGET_EVEX512 && !TARGET_PREFER_AVX256") + && !TARGET_PREFER_AVX256") (const_string "*")))]) (define_insn "*vec_dupv2di" @@ -27849,7 +27877,7 @@ %vmovddup\t{%1, %0|%0, %1} movlhps\t%0, %0" [(set_attr "isa" "sse2_noavx,avx,avx512f,sse3,noavx") - (set_attr "type" "sselog1,sselog1,ssemov,sselog1,ssemov") + (set_attr "type" "sselog1,sselog1,ssemov,ssemov,ssemov") (set_attr "prefix" "orig,maybe_evex,evex,maybe_vex,orig") (set (attr "mode") (cond [(and (eq_attr "alternative" "2") @@ -27865,8 +27893,7 @@ (if_then_else (eq_attr "alternative" "2") (symbol_ref "TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 - && !TARGET_PREFER_AVX256)") + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") (const_string "*")))]) (define_insn "avx2_vbroadcasti128_<mode>" @@ -27946,7 +27973,7 @@ [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") - (set_attr "isa" "avx2,noavx2,avx2,avx512f_512,noavx2") + (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2") (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")]) (define_split @@ -28010,8 +28037,8 @@ ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si. (define_mode_iterator VI4F_BRCST32x2 - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V16SF (V8SF "TARGET_AVX512VL")]) (define_mode_attr 64x2mode [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")]) @@ -28061,8 +28088,7 @@ ;; For broadcast[i|f]64x2 (define_mode_iterator VI8F_BRCST64x2 - [(V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512") - (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) + [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1" [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v") @@ -28118,27 +28144,26 @@ (set_attr "mode" "<sseinsnmode>")]) (define_mode_iterator VPERMI2 - [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") - (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512") + [V16SI V16SF V8DI V8DF (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") - (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) (define_mode_iterator VPERMI2I - [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") + [V16SI V8DI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") - (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) @@ -28813,29 +28838,28 @@ ;; Modes handled by vec_init expanders. (define_mode_iterator VEC_INIT_MODE - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2") - (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2") + (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")]) ;; Likewise, but for initialization from half sized vectors. ;; Thus, these are all VEC_INIT_MODE modes except V2??. (define_mode_iterator VEC_INIT_HALF_MODE - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") - (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") - (V4TI "TARGET_AVX512F && TARGET_EVEX512")]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") + (V4TI "TARGET_AVX512F")]) (define_expand "vec_init<mode><ssescalarmodelower>" [(match_operand:VEC_INIT_MODE 0 "register_operand") @@ -29096,7 +29120,7 @@ (unspec:V16SF [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_VCVTPH2PS))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -29186,7 +29210,7 @@ UNSPEC_VCVTPS2PH) (match_operand:V16HI 3 "nonimm_or_0_operand") (match_operand:HI 4 "register_operand")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int round = INTVAL (operands[2]); /* Separate {sae} from rounding control imm, @@ -29205,7 +29229,7 @@ [(match_operand:V16SF 1 "register_operand" "v") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_VCVTPS2PH))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtps2ph\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -29217,7 +29241,7 @@ [(match_operand:V16SF 1 "register_operand" "v") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_VCVTPS2PH))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -30196,7 +30220,7 @@ (match_operand:V8DI 2 "register_operand" "v") (match_operand:V8DI 3 "nonimmediate_operand" "vm")] VPMADD52))] - "TARGET_AVX512IFMA && TARGET_EVEX512" + "TARGET_AVX512IFMA" "vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "prefix" "evex") @@ -30567,7 +30591,7 @@ (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPDPBUSD))] - "TARGET_AVX512VNNI && TARGET_EVEX512" + "TARGET_AVX512VNNI" "vpdpbusd\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -30636,7 +30660,7 @@ (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPDPBUSDS))] - "TARGET_AVX512VNNI && TARGET_EVEX512" + "TARGET_AVX512VNNI" "vpdpbusds\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -30705,7 +30729,7 @@ (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPDPWSSD))] - "TARGET_AVX512VNNI && TARGET_EVEX512" + "TARGET_AVX512VNNI" "vpdpwssd\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -30774,7 +30798,7 @@ (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPDPWSSDS))] - "TARGET_AVX512VNNI && TARGET_EVEX512" + "TARGET_AVX512VNNI" "vpdpwssds\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -30930,8 +30954,7 @@ (set_attr "mode" "<sseinsnmode>")]) (define_mode_iterator VI48_AVX512VP2VL - [(V8DI "TARGET_EVEX512") - (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) (define_mode_iterator MASK_DWI [P2QI P2HI]) @@ -30973,12 +30996,12 @@ (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v") (match_operand:V16SI 2 "vector_operand" "vm")] UNSPEC_VP2INTERSECT))] - "TARGET_AVX512VP2INTERSECT && TARGET_EVEX512" + "TARGET_AVX512VP2INTERSECT" "vp2intersectd\t{%2, %1, %0|%0, %1, %2}" [(set_attr ("prefix") ("evex"))]) (define_mode_iterator VF_AVX512BF16VL - [(V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) + [V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) ;; Converting from BF to SF (define_mode_attr bf16_cvt_2sf [(V32BF "V16SF") (V16BF "V8SF") (V8BF "V4SF")]) @@ -31098,7 +31121,7 @@ "vcvtneps2bf16{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}") (define_mode_iterator VF1_AVX512_256 - [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")]) + [V16SF (V8SF "TARGET_AVX512VL")]) (define_expand "avx512f_cvtneps2bf16_<mode>_maskz" [(match_operand:<sf_cvt_bf16> 0 "register_operand") @@ -31144,7 +31167,7 @@ [(set (match_operand:V16BF 0 "register_operand") (float_truncate:V16BF (match_operand:V16SF 1 "nonimmediate_operand")))] - "TARGET_AVX512BW && TARGET_EVEX512 + "TARGET_AVX512BW && !HONOR_NANS (BFmode) && !flag_rounding_math && (flag_unsafe_math_optimizations || TARGET_AVX512BF16)" { @@ -31428,10 +31451,10 @@ ;; vinserti64x4 $0x1, %ymm15, %zmm15, %zmm15 (define_mode_iterator INT_BROADCAST_MODE - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512 && TARGET_64BIT") + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F && TARGET_64BIT") (V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")]) ;; Broadcast from an integer. NB: Enable broadcast only if we can move @@ -31705,8 +31728,8 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_cvt2ps2phx_<mode><mask_name><round_name>" - [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v") - (vec_concat:VHF_AVX10_2 + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v") + (vec_concat:VHF_AVX512VL (float_truncate:<ssehalfvecmode> (match_operand:<ssePSmode> 2 "<round_nimm_predicate>" "<round_constraint>")) (float_truncate:<ssehalfvecmode> @@ -31730,8 +31753,8 @@ (define_insn "vcvt<convertfp8_pack><mode><mask_name>" [(set (match_operand:<ssebvecmode> 0 "register_operand" "=v") (unspec:<ssebvecmode> - [(match_operand:VHF_AVX10_2 1 "register_operand" "v") - (match_operand:VHF_AVX10_2 2 "nonimmediate_operand" "vm")] + [(match_operand:VHF_AVX512VL 1 "register_operand" "v") + (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_CONVERTFP8_PACK))] "TARGET_AVX10_2" "vcvt<convertfp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand2>, %1, %2}" @@ -31814,7 +31837,7 @@ [(set_attr "prefix" "evex")]) (define_mode_iterator VHF_AVX10_2_2 - [(V32HF "TARGET_AVX10_2") V16HF]) + [V32HF V16HF]) (define_insn "vcvt<biasph2fp8_pack><mode><mask_name>" [(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v") @@ -31911,8 +31934,8 @@ [(set_attr "prefix" "evex")]) (define_insn "vcvthf82ph<mode><mask_name>" - [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v") - (unspec:VHF_AVX10_2 + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v") + (unspec:VHF_AVX512VL [(match_operand:<ssebvecmode_2> 1 "nonimmediate_operand" "vm")] UNSPEC_VCVTHF82PH))] "TARGET_AVX10_2" @@ -31934,8 +31957,8 @@ (define_expand "usdot_prod<sseunpackmodelower><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI2_AVX10_2 1 "register_operand") - (match_operand:VI2_AVX10_2 2 "register_operand") + (match_operand:VI2_AVX512F 1 "register_operand") + (match_operand:VI2_AVX512F 2 "register_operand") (match_operand:<sseunpackmode> 3 "register_operand")] "TARGET_AVXVNNIINT16 || TARGET_AVX10_2" { @@ -31952,8 +31975,8 @@ (define_expand "udot_prod<sseunpackmodelower><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI2_AVX10_2 1 "register_operand") - (match_operand:VI2_AVX10_2 2 "register_operand") + (match_operand:VI2_AVX512F 1 "register_operand") + (match_operand:VI2_AVX512F 2 "register_operand") (match_operand:<sseunpackmode> 3 "register_operand")] "TARGET_AVXVNNIINT16 || TARGET_AVX10_2" { @@ -32032,23 +32055,23 @@ [(set_attr "prefix" "evex")]) (define_insn "vdpphps_<mode>" - [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v") - (unspec:VF1_AVX10_2 - [(match_operand:VF1_AVX10_2 1 "register_operand" "0") - (match_operand:VF1_AVX10_2 2 "register_operand" "v") - (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") + (unspec:VF1_AVX512VL + [(match_operand:VF1_AVX512VL 1 "register_operand" "0") + (match_operand:VF1_AVX512VL 2 "register_operand" "v") + (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")] UNSPEC_VDPPHPS))] "TARGET_AVX10_2" "vdpphps\t{%3, %2, %0|%0, %2, %3}" [(set_attr "prefix" "evex")]) (define_insn "vdpphps_<mode>_mask" - [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v") - (vec_merge:VF1_AVX10_2 - (unspec:VF1_AVX10_2 - [(match_operand:VF1_AVX10_2 1 "register_operand" "0") - (match_operand:VF1_AVX10_2 2 "register_operand" "v") - (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") + (vec_merge:VF1_AVX512VL + (unspec:VF1_AVX512VL + [(match_operand:VF1_AVX512VL 1 "register_operand" "0") + (match_operand:VF1_AVX512VL 2 "register_operand" "v") + (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")] UNSPEC_VDPPHPS) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] @@ -32057,10 +32080,10 @@ [(set_attr "prefix" "evex")]) (define_expand "vdpphps_<mode>_maskz" - [(match_operand:VF1_AVX10_2 0 "register_operand") - (match_operand:VF1_AVX10_2 1 "register_operand") - (match_operand:VF1_AVX10_2 2 "register_operand") - (match_operand:VF1_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VF1_AVX512VL 0 "register_operand") + (match_operand:VF1_AVX512VL 1 "register_operand") + (match_operand:VF1_AVX512VL 2 "register_operand") + (match_operand:VF1_AVX512VL 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32070,60 +32093,60 @@ }) (define_insn "vdpphps_<mode>_maskz_1" - [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v") - (vec_merge:VF1_AVX10_2 - (unspec:VF1_AVX10_2 - [(match_operand:VF1_AVX10_2 1 "register_operand" "0") - (match_operand:VF1_AVX10_2 2 "register_operand" "v") - (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") + (vec_merge:VF1_AVX512VL + (unspec:VF1_AVX512VL + [(match_operand:VF1_AVX512VL 1 "register_operand" "0") + (match_operand:VF1_AVX512VL 2 "register_operand" "v") + (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")] UNSPEC_VDPPHPS) - (match_operand:VF1_AVX10_2 4 "const0_operand" "C") + (match_operand:VF1_AVX512VL 4 "const0_operand" "C") (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] "TARGET_AVX10_2" "vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_scalefbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")] + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "nonimmediate_operand" "vm")] UNSPEC_VSCALEFBF16))] "TARGET_AVX10_2" "vscalefbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "prefix" "evex")]) (define_expand "<code><mode>3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand") - (smaxmin:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "register_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))] + [(set (match_operand:VBF 0 "register_operand") + (smaxmin:VBF + (match_operand:VBF 1 "register_operand") + (match_operand:VBF 2 "nonimmediate_operand")))] "TARGET_AVX10_2") (define_insn "avx10_2_<code>bf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (smaxmin:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))] + [(set (match_operand:VBF 0 "register_operand" "=v") + (smaxmin:VBF + (match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2" "v<maxmin_float>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) (define_insn "avx10_2_<insn>bf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (plusminusmultdiv:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))] + [(set (match_operand:VBF 0 "register_operand" "=v") + (plusminusmultdiv:VBF + (match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2" "v<insn>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "prefix" "evex")]) (define_expand "avx10_2_fmaddbf16_<mode>_maskz" - [(match_operand:VBF_AVX10_2 0 "register_operand") - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VBF 0 "register_operand") + (match_operand:VBF 1 "nonimmediate_operand") + (match_operand:VBF 2 "nonimmediate_operand") + (match_operand:VBF 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32135,11 +32158,11 @@ }) (define_insn "avx10_2_fmaddbf16_<mode><sd_maskz_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))] + [(set (match_operand:VBF 0 "register_operand" "=v,v,v") + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm") + (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))] "TARGET_AVX10_2" "@ vfmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} @@ -32150,12 +32173,12 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fmaddbf16_<mode>_mask" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")) + [(set (match_operand:VBF 0 "register_operand" "=v,v") + (vec_merge:VBF + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "0,0") + (match_operand:VBF 2 "nonimmediate_operand" "vm,v") + (match_operand:VBF 3 "nonimmediate_operand" "v,vm")) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2" @@ -32167,12 +32190,12 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fmaddbf16_<mode>_mask3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")) + [(set (match_operand:VBF 0 "register_operand" "=v") + (vec_merge:VBF + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%v") + (match_operand:VBF 2 "nonimmediate_operand" "vm") + (match_operand:VBF 3 "nonimmediate_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2" @@ -32182,10 +32205,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_expand "avx10_2_fnmaddbf16_<mode>_maskz" - [(match_operand:VBF_AVX10_2 0 "register_operand") - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VBF 0 "register_operand") + (match_operand:VBF 1 "nonimmediate_operand") + (match_operand:VBF 2 "nonimmediate_operand") + (match_operand:VBF 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32197,12 +32220,12 @@ }) (define_insn "avx10_2_fnmaddbf16_<mode><sd_maskz_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))] + [(set (match_operand:VBF 0 "register_operand" "=v,v,v") + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")) + (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm") + (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))] "TARGET_AVX10_2" "@ vfnmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} @@ -32213,13 +32236,13 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fnmaddbf16_<mode>_mask" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")) + [(set (match_operand:VBF 0 "register_operand" "=v,v") + (vec_merge:VBF + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "0,0")) + (match_operand:VBF 2 "nonimmediate_operand" "vm,v") + (match_operand:VBF 3 "nonimmediate_operand" "v,vm")) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2" @@ -32231,13 +32254,13 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fnmaddbf16_<mode>_mask3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")) + [(set (match_operand:VBF 0 "register_operand" "=v") + (vec_merge:VBF + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%v")) + (match_operand:VBF 2 "nonimmediate_operand" "vm") + (match_operand:VBF 3 "nonimmediate_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2" @@ -32247,10 +32270,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_expand "avx10_2_fmsubbf16_<mode>_maskz" - [(match_operand:VBF_AVX10_2 0 "register_operand") - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VBF 0 "register_operand") + (match_operand:VBF 1 "nonimmediate_operand") + (match_operand:VBF 2 "nonimmediate_operand") + (match_operand:VBF 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32262,12 +32285,12 @@ }) (define_insn "avx10_2_fmsubbf16_<mode><sd_maskz_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))] + [(set (match_operand:VBF 0 "register_operand" "=v,v,v") + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))] "TARGET_AVX10_2" "@ vfmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} @@ -32278,13 +32301,13 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fmsubbf16_<mode>_mask" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) + [(set (match_operand:VBF 0 "register_operand" "=v,v") + (vec_merge:VBF + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "0,0") + (match_operand:VBF 2 "nonimmediate_operand" "vm,v") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2" @@ -32296,13 +32319,13 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fmsubbf16_<mode>_mask3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))) + [(set (match_operand:VBF 0 "register_operand" "=v") + (vec_merge:VBF + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%v") + (match_operand:VBF 2 "nonimmediate_operand" "vm") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2" @@ -32312,10 +32335,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_expand "avx10_2_fnmsubbf16_<mode>_maskz" - [(match_operand:VBF_AVX10_2 0 "register_operand") - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VBF 0 "register_operand") + (match_operand:VBF 1 "nonimmediate_operand") + (match_operand:VBF 2 "nonimmediate_operand") + (match_operand:VBF 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32327,13 +32350,13 @@ }) (define_insn "avx10_2_fnmsubbf16_<mode><sd_maskz_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))] + [(set (match_operand:VBF 0 "register_operand" "=v,v,v") + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")) + (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))] "TARGET_AVX10_2" "@ vfnmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} @@ -32344,14 +32367,14 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fnmsubbf16_<mode>_mask" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) + [(set (match_operand:VBF 0 "register_operand" "=v,v") + (vec_merge:VBF + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "0,0")) + (match_operand:VBF 2 "nonimmediate_operand" "vm,v") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2" @@ -32363,14 +32386,14 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fnmsubbf16_<mode>_mask3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))) + [(set (match_operand:VBF 0 "register_operand" "=v") + (vec_merge:VBF + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%v")) + (match_operand:VBF 2 "nonimmediate_operand" "vm") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2" @@ -32380,35 +32403,35 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_rsqrtbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")] + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "nonimmediate_operand" "vm")] UNSPEC_RSQRT))] "TARGET_AVX10_2" "vrsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_sqrtbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (sqrt:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")))] + [(set (match_operand:VBF 0 "register_operand" "=v") + (sqrt:VBF + (match_operand:VBF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2" "vsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_rcpbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")] + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "nonimmediate_operand" "vm")] UNSPEC_RCP))] "TARGET_AVX10_2" "vrcpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_getexpbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")] + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "nonimmediate_operand" "vm")] UNSPEC_GETEXP))] "TARGET_AVX10_2" "vgetexpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" @@ -32425,9 +32448,9 @@ (UNSPEC_VGETMANTBF16 "getmant")]) (define_insn "avx10_2_<bf16immop>bf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm") + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "nonimmediate_operand" "vm") (match_operand:SI 2 "const_0_to_255_operand")] BF16IMMOP))] "TARGET_AVX10_2" @@ -32437,7 +32460,7 @@ (define_insn "avx10_2_fpclassbf16_<mode><mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm") + [(match_operand:VBF 1 "nonimmediate_operand" "vm") (match_operand 2 "const_0_to_255_operand")] UNSPEC_VFPCLASSBF16))] "TARGET_AVX10_2" @@ -32447,8 +32470,8 @@ (define_insn "avx10_2_cmpbf16_<mode><mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> - [(match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") + [(match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "nonimmediate_operand" "vm") (match_operand 3 "const_0_to_31_operand" "n")] UNSPEC_PCMP))] "TARGET_AVX10_2" @@ -32486,7 +32509,7 @@ (define_insn "avx10_2_cvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs<mode><mask_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VBF_AVX10_2 1 "vector_operand" "vm")] + [(match_operand:VBF 1 "vector_operand" "vm")] UNSPEC_CVT_BF16_IBS_ITER))] "TARGET_AVX10_2" "vcvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" @@ -32501,7 +32524,7 @@ (define_insn "avx10_2_cvtph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VHF_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")] + [(match_operand:VHF_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_CVT_PH_IBS_ITER))] "TARGET_AVX10_2 && <round_mode512bit_condition>" "vcvtph2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" @@ -32516,7 +32539,7 @@ (define_insn "avx10_2_cvttph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VHF_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VHF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_CVTT_PH_IBS_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" "vcvttph2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" @@ -32531,7 +32554,7 @@ (define_insn "avx10_2_cvtps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VF1_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")] + [(match_operand:VF1_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_CVT_PS_IBS_ITER))] "TARGET_AVX10_2 && <round_mode512bit_condition>" "vcvtps2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" @@ -32546,7 +32569,7 @@ (define_insn "avx10_2_cvttps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VF1_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VF1_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_CVTT_PS_IBS_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" "vcvttps2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" @@ -32565,7 +32588,7 @@ (define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>" [(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v") (unspec:<VEC_GATHER_IDXSI> - [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_SAT_CVT_DS_SIGN_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" "vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" @@ -32576,7 +32599,7 @@ (define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>" [(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v") (unspec:<VEC_GATHER_IDXDI> - [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VF2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_SAT_CVT_DS_SIGN_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" "vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" @@ -32585,8 +32608,8 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v") - (unspec:VI8_AVX10_2 + [(set (match_operand:VI8 0 "register_operand" "=v") + (unspec:VI8 [(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_SAT_CVT_DS_SIGN_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" @@ -32622,10 +32645,10 @@ (set_attr "mode" "<MODE>")]) (define_insn "avx10_2_minmaxbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr") + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "bcst_vector_operand" "vmBr") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_MINMAXBF16))] "TARGET_AVX10_2" @@ -32634,10 +32657,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_minmaxp<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VFH_AVX10_2 0 "register_operand" "=v") - (unspec:VFH_AVX10_2 - [(match_operand:VFH_AVX10_2 1 "register_operand" "v") - (match_operand:VFH_AVX10_2 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "register_operand" "v") + (match_operand:VFH_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_MINMAX))] "TARGET_AVX10_2" @@ -32661,9 +32684,9 @@ (set_attr "mode" "<ssescalarmode>")]) (define_insn "avx10_2_vmovrs<ssemodesuffix><mode><mask_name>" - [(set (match_operand:VI1248_AVX10_2 0 "register_operand" "=v") - (unspec:VI1248_AVX10_2 - [(match_operand:VI1248_AVX10_2 1 "memory_operand" "m")] + [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand" "=v") + (unspec:VI1248_AVX512VLBW + [(match_operand:VI1248_AVX512VLBW 1 "memory_operand" "m")] UNSPEC_VMOVRS))] "TARGET_AVX10_2 && TARGET_MOVRS" "vmovrs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h index 15d8e96..64f3c20 100644 --- a/gcc/config/i386/vaesintrin.h +++ b/gcc/config/i386/vaesintrin.h @@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B) #endif /* __DISABLE_VAES__ */ -#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__) +#if !defined(__VAES__) || !defined(__AVX512F__) #pragma GCC push_options -#pragma GCC target("vaes,avx512f,evex512") +#pragma GCC target("vaes,avx512f") #define __DISABLE_VAESF__ #endif /* __VAES__ */ diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h index 2b36c37..a02ab38 100644 --- a/gcc/config/i386/vpclmulqdqintrin.h +++ b/gcc/config/i386/vpclmulqdqintrin.h @@ -28,9 +28,9 @@ #ifndef _VPCLMULQDQINTRIN_H_INCLUDED #define _VPCLMULQDQINTRIN_H_INCLUDED -#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__) +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) #pragma GCC push_options -#pragma GCC target("vpclmulqdq,avx512f,evex512") +#pragma GCC target("vpclmulqdq,avx512f") #define __DISABLE_VPCLMULQDQF__ #endif /* __VPCLMULQDQF__ */ diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 7c8cb73..c8603b9 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -107,6 +107,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ in 128bit, 256bit and 512bit */ 4, 4, 6, /* cost of moving XMM,YMM,ZMM register */ 4, /* cost of moving SSE register to integer. */ + 4, /* cost of moving integer register to SSE. */ COSTS_N_BYTES (5), 0, /* Gather load static, per_elt. */ COSTS_N_BYTES (5), 0, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ @@ -121,16 +122,24 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ - COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */ - COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_BYTES (2), /* cost of MULSS instruction. */ - COSTS_N_BYTES (2), /* cost of MULSD instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SS instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SD instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */ + COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_BYTES (4), /* cost of MULSS instruction. */ + COSTS_N_BYTES (4), /* cost of MULSD instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SS instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SD instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSS instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSD instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ + COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */ + 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -219,6 +228,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ @@ -243,6 +253,13 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -330,6 +347,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 4, /* size of l1 cache. 486 has 8kB cache @@ -356,6 +374,13 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -443,6 +468,7 @@ struct processor_costs pentium_cost = { {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -467,6 +493,13 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -547,6 +580,7 @@ struct processor_costs lakemont_cost = { {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -571,6 +605,13 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -666,6 +707,7 @@ struct processor_costs pentiumpro_cost = { {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -690,6 +732,13 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -776,6 +825,7 @@ struct processor_costs geode_cost = { {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 2, 2, /* Gather load static, per_elt. */ 2, 2, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -800,6 +850,13 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -886,6 +943,7 @@ struct processor_costs k6_cost = { {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 2, 2, /* Gather load static, per_elt. */ 2, 2, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -913,6 +971,13 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1002,6 +1067,7 @@ struct processor_costs athlon_cost = { {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 5, /* cost of moving SSE register to integer. */ + 5, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -1027,6 +1093,13 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1120,6 +1193,7 @@ struct processor_costs k8_cost = { {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 5, /* cost of moving SSE register to integer. */ + 5, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -1150,6 +1224,13 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1251,6 +1332,7 @@ struct processor_costs amdfam10_cost = { {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -1281,6 +1363,13 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1374,6 +1463,7 @@ const struct processor_costs bdver_cost = { {10, 10, 10, 40, 60}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 16, /* cost of moving SSE register to integer. */ + 16, /* cost of moving integer register to SSE. */ 12, 12, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ @@ -1405,6 +1495,13 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1518,6 +1615,7 @@ struct processor_costs znver1_cost = { {8, 8, 8, 16, 32}, /* cost of unaligned stores. */ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, throughput 12. Approx 9 uops do not depend on vector size and every load is 7 uops. */ @@ -1553,6 +1651,14 @@ struct processor_costs znver1_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + /* Real latency is 4, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1677,6 +1783,7 @@ struct processor_costs znver2_cost = { 2, 2, 3, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, throughput 12. Approx 9 uops do not depend on vector size and every load is 7 uops. */ @@ -1712,6 +1819,13 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1812,6 +1926,7 @@ struct processor_costs znver3_cost = { 2, 2, 3, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops, throughput 9. Approx 7 uops do not depend on vector size and every load is 4 uops. */ @@ -1847,6 +1962,13 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1949,6 +2071,7 @@ struct processor_costs znver4_cost = { 2, 2, 2, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops, throughput 5. Approx 7 uops do not depend on vector size and every load is 5 uops. */ @@ -1984,6 +2107,14 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + /* Real latency is 6, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2089,6 +2220,7 @@ struct processor_costs znver5_cost = { 2, 2, 2, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* TODO: gather and scatter instructions are currently disabled in x86-tune.def. In some cases they are however a win, see PR116582 @@ -2135,6 +2267,13 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ /* DIVSD has throughtput 0.13 and latency 20. */ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2250,6 +2389,7 @@ struct processor_costs skylake_cost = { {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 20, 8, /* Gather load static, per_elt. */ 22, 10, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -2274,6 +2414,13 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2379,6 +2526,7 @@ struct processor_costs icelake_cost = { {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 20, 8, /* Gather load static, per_elt. */ 22, 10, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -2403,6 +2551,13 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2502,6 +2657,7 @@ struct processor_costs alderlake_cost = { {8, 8, 8, 10, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -2526,6 +2682,13 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2618,6 +2781,7 @@ const struct processor_costs btver1_cost = { {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 14, /* cost of moving SSE register to integer. */ + 14, /* cost of moving integer register to SSE. */ 10, 10, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -2642,6 +2806,13 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2731,6 +2902,7 @@ const struct processor_costs btver2_cost = { {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 14, /* cost of moving SSE register to integer. */ + 14, /* cost of moving integer register to SSE. */ 10, 10, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -2755,6 +2927,13 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2843,6 +3022,7 @@ struct processor_costs pentium4_cost = { {32, 32, 32, 64, 128}, /* cost of unaligned stores. */ 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */ 20, /* cost of moving SSE register to integer. */ + 20, /* cost of moving integer register to SSE. */ 16, 16, /* Gather load static, per_elt. */ 16, 16, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -2867,6 +3047,13 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -2958,6 +3145,7 @@ struct processor_costs nocona_cost = { {24, 24, 24, 48, 96}, /* cost of unaligned stores. */ 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */ 20, /* cost of moving SSE register to integer. */ + 20, /* cost of moving integer register to SSE. */ 12, 12, /* Gather load static, per_elt. */ 12, 12, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -2982,6 +3170,13 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3071,6 +3266,7 @@ struct processor_costs atom_cost = { {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 8, /* cost of moving SSE register to integer. */ + 8, /* cost of moving integer register to SSE. */ 8, 8, /* Gather load static, per_elt. */ 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3095,6 +3291,13 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3184,6 +3387,7 @@ struct processor_costs slm_cost = { {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 8, /* cost of moving SSE register to integer. */ + 8, /* cost of moving integer register to SSE. */ 8, 8, /* Gather load static, per_elt. */ 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3208,6 +3412,13 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3309,6 +3520,7 @@ struct processor_costs tremont_cost = { {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3335,6 +3547,13 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3349,119 +3568,6 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (2), /* Branch mispredict scale. */ }; -static stringop_algs intel_memcpy[2] = { - {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, - {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; -static stringop_algs intel_memset[2] = { - {libcall, {{8, loop, false}, {15, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{24, loop, false}, {32, unrolled_loop, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; -static const -struct processor_costs intel_cost = { - { - /* Start of register allocator costs. integer->integer move cost is 2. */ - 6, /* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {6, 6, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 10}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6}, /* cost of storing MMX registers - in SImode and DImode */ - 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ - {6, 6, 6, 6, 6}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {6, 6, 6, 6, 6}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - 4, 4, /* SSE->integer and integer->SSE moves */ - 4, 4, /* mask->integer and integer->mask moves */ - {4, 4, 4}, /* cost of loading mask register - in QImode, HImode, SImode. */ - {6, 6, 6}, /* cost if storing mask register - in QImode, HImode, SImode. */ - 2, /* cost of moving mask register. */ - /* End of register allocator costs. */ - }, - - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (3), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (2)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (26), /* HI */ - COSTS_N_INSNS (42), /* SI */ - COSTS_N_INSNS (74), /* DI */ - COSTS_N_INSNS (74)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 17, /* MOVE_RATIO */ - 6, /* CLEAR_RATIO */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers */ - {6, 6, 6, 6, 6}, /* cost of loading SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit */ - {6, 6, 6, 6, 6}, /* cost of storing SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit */ - {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ - {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ - 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ - 4, /* cost of moving SSE register to integer. */ - 6, 6, /* Gather load static, per_elt. */ - 6, 6, /* Gather store static, per_elt. */ - 32, /* size of l1 cache. */ - 256, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - 3, /* Branch cost */ - COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (8), /* cost of FMUL instruction. */ - COSTS_N_INSNS (20), /* cost of FDIV instruction. */ - COSTS_N_INSNS (8), /* cost of FABS instruction. */ - COSTS_N_INSNS (8), /* cost of FCHS instruction. */ - COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - - COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ - COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_INSNS (8), /* cost of MULSS instruction. */ - COSTS_N_INSNS (8), /* cost of MULSD instruction. */ - COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ - COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ - COSTS_N_INSNS (20), /* cost of DIVSS instruction. */ - COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ - COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ - COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ - 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ - intel_memcpy, - intel_memset, - COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ - COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ - "16", /* Loop alignment. */ - "16:8:8", /* Jump alignment. */ - "0:0:8", /* Label alignment. */ - "16", /* Func alignment. */ - 4, /* Small unroll limit. */ - 2, /* Small unroll factor. */ - COSTS_N_INSNS (2), /* Branch mispredict scale. */ -}; - /* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU. */ static stringop_algs lujiazui_memcpy[2] = { {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, @@ -3532,15 +3638,16 @@ struct processor_costs lujiazui_cost = { {6, 6, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers. */ + {6, 6, 6}, /* cost of storing integer registers. */ {6, 6, 6, 10, 15}, /* cost of loading SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit. */ + in 32bit, 64bit, 128bit, 256bit and 512bit. */ {6, 6, 6, 10, 15}, /* cost of storing SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit. */ + in 32bit, 64bit, 128bit, 256bit and 512bit. */ {6, 6, 6, 10, 15}, /* cost of unaligned loads. */ {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ - 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ - 6, /* cost of moving SSE register to integer. */ + 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ + 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3566,6 +3673,13 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3658,6 +3772,7 @@ struct processor_costs yongfeng_cost = { {8, 8, 8, 12, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ 8, /* cost of moving SSE register to integer. */ + 8, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3682,6 +3797,13 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3774,6 +3896,7 @@ struct processor_costs shijidadao_cost = { {8, 8, 8, 12, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ 8, /* cost of moving SSE register to integer. */ + 8, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3798,6 +3921,13 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -3814,19 +3944,36 @@ struct processor_costs shijidadao_cost = { -/* Generic should produce code tuned for Core-i7 (and newer chips) - and btver1 (and newer chips). */ +/* Generic should produce code tuned for Haswell (and newer chips) + and znver1 (and newer chips): + 1. Don't align memory. + 2. For known sizes, prefer vector loop, unroll loop with 4 moves or + stores per iteration without aligning the loop, up to 256 bytes. + 3. For unknown sizes, use memcpy/memset. + 4. Since each loop iteration has 4 stores and 8 stores for zeroing + with unroll loop may be needed, change CLEAR_RATIO to 10 so that + zeroing up to 72 bytes are fully unrolled with 9 stores without + SSE. + */ static stringop_algs generic_memcpy[2] = { - {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}, + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}}; static stringop_algs generic_memset[2] = { - {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}, + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}}; static const struct processor_costs generic_cost = { { @@ -3883,7 +4030,7 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - 6, /* CLEAR_RATIO */ + 10, /* CLEAR_RATIO */ {6, 6, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ @@ -3896,6 +4043,7 @@ struct processor_costs generic_cost = { {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3922,6 +4070,13 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4022,6 +4177,7 @@ struct processor_costs core_cost = { {6, 6, 6, 6, 12}, /* cost of unaligned stores. */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ 2, /* cost of moving SSE register to integer. */ + 2, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops, rec. throughput 6. So 5 uops statically and one uops per load. */ @@ -4051,6 +4207,13 @@ struct processor_costs core_cost = { COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index 685a83c..ff9c268 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -45,7 +45,6 @@ ix86_issue_rate (void) case PROCESSOR_LAKEMONT: case PROCESSOR_BONNELL: case PROCESSOR_SILVERMONT: - case PROCESSOR_INTEL: case PROCESSOR_K6: case PROCESSOR_BTVER2: case PROCESSOR_PENTIUM4: @@ -80,7 +79,17 @@ ix86_issue_rate (void) case PROCESSOR_ALDERLAKE: case PROCESSOR_YONGFENG: case PROCESSOR_SHIJIDADAO: + case PROCESSOR_SIERRAFOREST: + case PROCESSOR_INTEL: case PROCESSOR_GENERIC: + /* For znver5 decoder can handle 4 or 8 instructions per cycle, + op cache 12 instruction/cycle, dispatch 8 instructions + integer rename 8 instructions and Fp 6 instructions. + + The scheduler, without understanding out of order nature of the CPU + is not going to be able to use more than 4 instructions since that + is limits of the decoders. */ + case PROCESSOR_ZNVER5: return 4; case PROCESSOR_ICELAKE_CLIENT: @@ -91,13 +100,14 @@ ix86_issue_rate (void) return 5; case PROCESSOR_SAPPHIRERAPIDS: - /* For znver5 decoder can handle 4 or 8 instructions per cycle, - op cache 12 instruction/cycle, dispatch 8 instructions - integer rename 8 instructions and Fp 6 instructions. - - The scheduler, without understanding out of order nature of the CPU - is unlikely going to be able to fill all of these. */ - case PROCESSOR_ZNVER5: + case PROCESSOR_GRANITERAPIDS: + case PROCESSOR_GRANITERAPIDS_D: + case PROCESSOR_DIAMONDRAPIDS: + case PROCESSOR_GRANDRIDGE: + case PROCESSOR_CLEARWATERFOREST: + case PROCESSOR_ARROWLAKE: + case PROCESSOR_ARROWLAKE_S: + case PROCESSOR_PANTHERLAKE: return 6; default: @@ -487,6 +497,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case PROCESSOR_HASWELL: case PROCESSOR_TREMONT: case PROCESSOR_ALDERLAKE: + case PROCESSOR_INTEL: case PROCESSOR_GENERIC: /* Stack engine allows to execute push&pop instructions in parall. */ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) @@ -509,7 +520,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, break; case PROCESSOR_SILVERMONT: - case PROCESSOR_INTEL: if (!reload_completed) return cost; diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index c857e76..91cdca7 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -87,9 +87,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, several insns to break false dependency on the dest register for GLC micro-architecture. */ DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC, - "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS - | m_GRANITERAPIDS_D | m_DIAMONDRAPIDS | m_CORE_HYBRID - | m_CORE_ATOM) + "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE) /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies are resolved on SSE register parts instead of whole registers, so we may @@ -574,6 +572,11 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV, "sse_movcc_use_blendv", ~m_CORE_ATOM) +/* X86_TUNE_V4SI_REDUCTION_PREFER_SHUFD: Prefer pshuf to reduce V16QI, + V8HI, V8HI, V4SI, V4FI, V2DI modes when lshr are costlier. */ +DEF_TUNE (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF, + "sse_reduction_prefer_pshuf", m_ZNVER4 | m_ZNVER5) + /*****************************************************************************/ /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ |