diff options
Diffstat (limited to 'gcc/config/i386')
63 files changed, 8858 insertions, 6664 deletions
diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h deleted file mode 100644 index 21e4b36..0000000 --- a/gcc/config/i386/avx10_2-512bf16intrin.h +++ /dev/null @@ -1,681 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512bf16intrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED -#define _AVX10_2_512BF16INTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_add_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_addbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_addbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_addbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_sub_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_subbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_subbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_subbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mul_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_mulbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_div_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_divbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_divbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_divbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_max_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_maxbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_min_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_minbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_minbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_minbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_scalef_pbh (__m512bh __A, __m512bh __B) -{ - return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) -{ - return (__m512bh) - __builtin_ia32_scalefbf16512_mask (__A, __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B, - __m512bh __C, __mmask32 __U) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, __m512bh __C) -{ - return (__m512bh) - __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_rsqrt_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); - -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rsqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_sqrt_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_sqrtbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_rcp_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_rcpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_getexp_pbh (__m512bh __A) -{ - return (__m512bh) - __builtin_ia32_getexpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) -{ - return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A) -{ - return (__m512bh) - __builtin_ia32_getexpbf16512_mask (__A, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -/* Intrinsics vrndscalebf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_roundscale_pbh (__m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_rndscalebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_roundscale_pbh(A, B) \ - (__builtin_ia32_rndscalebf16512_mask ((A), (B), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_roundscale_pbh(A, B, C, D) \ - (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B))) - -#define _mm512_maskz_roundscale_pbh(A, B, C) \ - (__builtin_ia32_rndscalebf16512_mask ((B), (C), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vreducebf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_reduce_pbh (__m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B) -{ - return (__m512bh) - __builtin_ia32_reducebf16512_mask (__A, B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_reduce_pbh(A, B) \ - (__builtin_ia32_reducebf16512_mask ((A), (B), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_reduce_pbh(A, B, C, D) \ - (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B))) - -#define _mm512_maskz_reduce_pbh(A, B, C) \ - (__builtin_ia32_reducebf16512_mask ((B), (C), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vgetmantbf16. */ -#ifdef __OPTIMIZE__ -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - (__v32bf) _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - __W, __U); -} - -extern __inline__ __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A, - _MM_MANTISSA_NORM_ENUM __B, - _MM_MANTISSA_SIGN_ENUM __C) -{ - return (__m512bh) - __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, - (__v32bf) _mm512_setzero_si512 (), - __U); -} - -#else -#define _mm512_getmant_pbh(A, B, C) \ - (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \ - (__v32bf) _mm512_setzero_si512 (), \ - (__mmask32) -1)) - -#define _mm512_mask_getmant_pbh(A, B, C, D, E) \ - (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) - -#define _mm512_maskz_getmant_pbh(A, B, C, D) \ - (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \ - (__v32bf) _mm512_setzero_si512 (), \ - (A))) - -#endif /* __OPTIMIZE__ */ - -/* Intrinsics vfpclassbf16. */ -#ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A, - const int __imm) -{ - return (__mmask32) - __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm) -{ - return (__mmask32) - __builtin_ia32_fpclassbf16512_mask (__A, __imm, - (__mmask32) -1); -} - -#else -#define _mm512_mask_fpclass_pbh_mask(U, X, C) \ - ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ - (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U))) - -#define _mm512_fpclass_pbh_mask(X, C) \ - ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ - (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1))) -#endif /* __OPIMTIZE__ */ - - -/* Intrinsics vcmpbf16. */ -#ifdef __OPTIMIZE__ -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B, - const int __imm) -{ - return (__mmask32) - __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U); -} - -extern __inline __mmask32 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm) -{ - return (__mmask32) - __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, - (__mmask32) -1); -} - -#else -#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \ - ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A))) - -#define _mm512_cmp_pbh_mask(A, B, C) \ - ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1))) - -#endif /* __OPIMTIZE__ */ - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h deleted file mode 100644 index 611a40d..0000000 --- a/gcc/config/i386/avx10_2-512convertintrin.h +++ /dev/null @@ -1,572 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifndef _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512convertintrin.h> directly; include <immintrin.h> instead." -#endif // _IMMINTRIN_H_INCLUDED - -#ifndef __AVX10_2_512CONVERTINTRIN_H_INCLUDED -#define __AVX10_2_512CONVERTINTRIN_H_INCLUDED - -#ifndef __AVX10_2__ -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtx2ps_ph (__m512 __A, __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, - __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) __W, - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) -1, - __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, - __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A, - __m512 __B, const int __R) -{ - return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, - (__v16sf) __B, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - __R); -} - -#else -#define _mm512_cvtx_round2ps_ph(A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) \ - (_mm512_setzero_ph ()), \ - (__mmask32) (-1), \ - (R))) -#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) (W), \ - (__mmask32) (U), \ - (R))) -#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \ - ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (__v32hf) \ - (_mm512_setzero_ph ()), \ - (__mmask32) (U), \ - (R))) -#endif /* __OPTIMIZE__ */ - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A, - __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U, - __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) -{ - return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, - (__v32hf) __B, - (__v32qi)(__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) -1); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U, - __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) __W, - (__mmask64) __U); -} - -extern __inline__ __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) -{ - return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, - (__v32hf) __B, - (__v64qi) - _mm512_setzero_si512 (), - (__mmask64) __U); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvthf8_ph (__m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) - _mm512_undefined_ph (), - (__mmask32) -1); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) __W, - (__mmask32) __U); -} - -extern __inline__ __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A) -{ - return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, - (__v32hf) (__m512h) - _mm512_setzero_ph (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtph_bf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_ph_bf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtph_hf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi)(__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvts_ph_hf8 (__m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_undefined_si256 (), - (__mmask32) -1); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) __W, - (__mmask32) __U); -} - -extern __inline__ __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A) -{ - return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, - (__v32qi) (__m256i) - _mm256_setzero_si256 (), - (__mmask32) __U); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtbf8_ph (__m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( - (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 ( - (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A) -{ - return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( - (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8)); -} - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* __AVX10_2_512CONVERTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512mediaintrin.h b/gcc/config/i386/avx10_2-512mediaintrin.h deleted file mode 100644 index 43271e7..0000000 --- a/gcc/config/i386/avx10_2-512mediaintrin.h +++ /dev/null @@ -1,514 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512mediaintrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef _AVX10_2_512MEDIAINTRIN_H_INCLUDED -#define _AVX10_2_512MEDIAINTRIN_H_INCLUDED - -#if !defined(__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W, - __m512i __A, __m512i __B) -{ - return (__m512i) - __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_mask ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) -1); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A, - __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_mask ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) __U); -} - -extern __inline __m512 -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A, - __m512h __B) -{ - return (__m512) - __builtin_ia32_vdpphps512_maskz ((__v16sf) __W, - (__v16sf) __A, - (__v16sf) __B, - (__mmask16) __U); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X, - (__v64qi) __Y, - __M); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X, - __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, - (__v64qi) __Y, - __M, - (__v32hi) __W, - __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X, - __m512i __Y, const int __M) -{ - return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, - (__v64qi) __Y, - __M, - (__v32hi) _mm512_setzero_epi32 (), - __U); -} -#else -#define _mm512_mpsadbw_epu8(X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), (int)(M)) - -#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), \ - (int)(M), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)) - -#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \ - (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ - (__v64qi)(__m512i)(Y), \ - (int)(M), \ - (__v32hi) _mm512_setzero_epi32 (), \ - (__mmask32)(U)) -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* __AVX10_2_512MEDIAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512minmaxintrin.h b/gcc/config/i386/avx10_2-512minmaxintrin.h deleted file mode 100644 index a743346..0000000 --- a/gcc/config/i386/avx10_2-512minmaxintrin.h +++ /dev/null @@ -1,489 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - This file is part of GCC. - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512minmaxintrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED -#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -#ifdef __OPTIMIZE__ -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf)(__m512bh) - _mm512_setzero_si512 (), - (__mmask32) -1); -} - -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U, - __m512bh __A, __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf) __W, - (__mmask32) __U); -} - -extern __inline __m512bh -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A, - __m512bh __B, const int __C) -{ - return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, - (__v32bf) __B, - __C, - (__v32bf)(__m512bh) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B, const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B, - const int __C) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C, - const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1, __R); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A, - __m512d __B, const int __C, const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) __W, - (__mmask8) __U, __R); -} - -extern __inline __m512d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B, - const int __C, const int __R) -{ - return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, - (__v8df) __B, - __C, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_undefined_ph (), - (__mmask32) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A, - __m512h __B, const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) __W, - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B, - const int __C) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_undefined_ph (), - (__mmask32) -1, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A, - __m512h __B, const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) __W, - (__mmask32) __U, __R); -} - -extern __inline __m512h -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B, - const int __C, const int __R) -{ - return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, - (__v32hf) __B, - __C, - (__v32hf) - _mm512_setzero_ph (), - (__mmask32) __U, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A, - __m512 __B, const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B, - const int __C) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A, - __m512 __B, const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) __W, - (__mmask16) __U, __R); -} - -extern __inline __m512 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B, - const int __C, const int __R) -{ - return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, - (__v16sf) __B, - __C, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U, __R); -} - -#else -#define _mm512_minmax_pbh(A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) \ - _mm512_setzero_si512 (), \ - (__mmask32) (-1))) - -#define _mm512_mask_minmax_pbh(W, U, A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) (W), \ - (__mmask32) (U))) - -#define _mm512_maskz_minmax_pbh(U, A, B, C) \ - ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ - (__v32bf) (B), \ - (int) (C), \ - (__v32bf) (__m512bh) \ - _mm512_setzero_si512 (), \ - (__mmask32) (U))) - -#define _mm512_minmax_round_pd(A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_undefined_pd (), \ - (__mmask8) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) (W), \ - (__mmask8) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_setzero_pd (), \ - (__mmask8) (U), \ - (int) (R))) - -#define _mm512_minmax_round_ph(A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_undefined_ph (), \ - (__mmask32) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) (W), \ - (__mmask32) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_setzero_ph (), \ - (__mmask32) (U), \ - (int) (R))) - -#define _mm512_minmax_round_ps(A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_undefined_ps (), \ - (__mmask16) (-1), \ - (int) (R))) - -#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) (W), \ - (__mmask16) (U), \ - (int) (R))) - -#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_setzero_ps (), \ - (__mmask16) (U), \ - (int) (R))) - -#define _mm512_minmax_pd(A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_undefined_pd (), \ - (__mmask8) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_pd(W, U, A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) (W), \ - (__mmask8) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_pd(U, A, B, C) \ - ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ - (__v8df) (B), \ - (int) (C), \ - (__v8df) (__m512d) \ - _mm512_setzero_pd (), \ - (__mmask8) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_minmax_ph(A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_undefined_ph (), \ - (__mmask32) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_ph(W, U, A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) (W), \ - (__mmask32) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_ph(U, A, B, C) \ - ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ - (__v32hf) (B), \ - (int) (C), \ - (__v32hf) (__m512h) \ - _mm512_setzero_ph (), \ - (__mmask32) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_minmax_ps(A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_undefined_ps (), \ - (__mmask16) (-1), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_minmax_ps(W, U, A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) (W), \ - (__mmask16) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_maskz_minmax_ps(U, A, B, C) \ - ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ - (__v16sf) (B), \ - (int) (C), \ - (__v16sf) (__m512) \ - _mm512_setzero_ps (), \ - (__mmask16) (U), \ - _MM_FROUND_CUR_DIRECTION)) - -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h deleted file mode 100644 index 215b7fd..0000000 --- a/gcc/config/i386/avx10_2-512satcvtintrin.h +++ /dev/null @@ -1,1575 +0,0 @@ -/* Copyright (C) 2024-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined _IMMINTRIN_H_INCLUDED -#error "Never use <avx10_2-512satcvtintrin.h> directly; include <immintrin.h> instead." -#endif - -#ifndef _AVX10_2_512SATCVTINTRIN_H_INCLUDED -#define _AVX10_2_512SATCVTINTRIN_H_INCLUDED - -#if !defined (__AVX10_2__) -#pragma GCC push_options -#pragma GCC target("avx10.2") -#define __DISABLE_AVX10_2__ -#endif /* __AVX10_2__ */ - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_bf16_epi8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_bf16_epu8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_bf16_epi8 (__m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A) -{ - return - (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_bf16_epu8 (__m512bh __A) -{ - return (__m512i) - __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) -{ - return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A) -{ - return (__m512i) - __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ph_epi8 (__m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ph_epu8 (__m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ps_epi8 (__m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_ps_epu8 (__m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ph_epi8 (__m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A) -{ - return - (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ph_epu8 (__m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) -{ - return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ps_epi8 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_ps_epu8 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epi32 (__m512d __A) -{ - return (__m256i) - __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) -{ - return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A) -{ - return - (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epi64 (__m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) -{ - return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A) -{ - return - (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epu32 (__m512d __A) -{ - return (__m256i) - __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) -{ - return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A) -{ - return - (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_pd_epu64 (__m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) -{ - return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epi32 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epi64 (__m256 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) -{ - return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epu32 (__m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) -{ - return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_ps_epu64 (__m256 __A) -{ - return (__m512i) - __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) -{ - return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A) -{ - return - (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); -} - -#ifdef __OPTIMIZE__ -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_undefined_si512 (), - (__mmask32) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) __W, - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, - (__v32hi) - _mm512_setzero_si512 (), - (__mmask32) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R) -{ - return (__m256i) - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R) -{ - return (__m256i) - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_undefined_si256 (), - (__mmask8) -1, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) -{ - return - (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_undefined_si512 (), - (__mmask16) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) __W, - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R) -{ - return (__m512i) - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_undefined_si512 (), - (__mmask8) -1, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, - const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) __W, - (__mmask8) __U, - __R); -} - -extern __inline __m512i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R) -{ - return - (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U, - __R); -} -#else -#define _mm512_ipcvts_roundph_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvts_roundph_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvts_roundps_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvts_roundps_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvtts_roundph_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvtts_roundph_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_undefined_si512 ()), \ - (__mmask32) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) (W), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ - (__v32hi) \ - (_mm512_setzero_si512 ()), \ - (__mmask32) (U), \ - (R))) - -#define _mm512_ipcvtts_roundps_epi8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_ipcvtts_roundps_epu8(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epi32(A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_undefined_si256 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \ - ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_setzero_si256 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epi64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epu32(A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_undefined_si256 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \ - ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \ - ((__m256i) \ - __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ - (__v8si) \ - (_mm256_setzero_si256 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundpd_epu64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epi32(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epi64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epu32(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_undefined_si512 ()), \ - (__mmask16) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) (W), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ - (__v16si) \ - (_mm512_setzero_si512 ()), \ - (__mmask16) (U), \ - (R))) - -#define _mm512_cvtts_roundps_epu64(A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_undefined_si512 ()), \ - (__mmask8) (-1), \ - (R))) - -#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \ - ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) (W), \ - (__mmask8) (U), \ - (R))) - -#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \ - ((__m512i) \ - __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ - (__v8di) \ - (_mm512_setzero_si512 ()), \ - (__mmask8) (U), \ - (R))) -#endif - -#ifdef __DISABLE_AVX10_2__ -#undef __DISABLE_AVX10_2__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX10_2__ */ - -#endif /* _AVX10_2_512SATCVTINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h index e6890fc..9560480 100644 --- a/gcc/config/i386/avx10_2bf16intrin.h +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -34,6 +34,32 @@ #define __DISABLE_AVX10_2__ #endif /* __AVX10_2__ */ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_add_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_addbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_addbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_addbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_add_pbh (__m256bh __A, __m256bh __B) @@ -86,6 +112,32 @@ _mm_maskz_add_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sub_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_subbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_subbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_subbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sub_pbh (__m256bh __A, __m256bh __B) @@ -138,6 +190,32 @@ _mm_maskz_sub_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mul_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_mulbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mul_pbh (__m256bh __A, __m256bh __B) @@ -190,6 +268,32 @@ _mm_maskz_mul_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_div_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_divbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_divbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_divbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_div_pbh (__m256bh __A, __m256bh __B) @@ -242,6 +346,32 @@ _mm_maskz_div_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_max_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_maxbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_max_pbh (__m256bh __A, __m256bh __B) @@ -294,6 +424,32 @@ _mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_min_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_minbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_minbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_minbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_min_pbh (__m256bh __A, __m256bh __B) @@ -346,6 +502,32 @@ _mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_scalef_pbh (__m512bh __A, __m512bh __B) +{ + return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B) +{ + return (__m512bh) + __builtin_ia32_scalefbf16512_mask (__A, __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_scalef_pbh (__m256bh __A, __m256bh __B) @@ -398,6 +580,41 @@ _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -468,6 +685,41 @@ _mm_maskz_fmadd_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fmaddbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -537,6 +789,41 @@ _mm_maskz_fmsub_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fmsubbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -607,6 +894,41 @@ _mm_maskz_fnmadd_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fnmaddbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B, + __m512bh __C, __mmask32 __U) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, __m512bh __C) +{ + return (__m512bh) + __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C) @@ -677,6 +999,35 @@ _mm_maskz_fnmsub_pbh (__mmask8 __U, __m128bh __A, __builtin_ia32_fnmsubbf16128_maskz (__A, __B, __C, __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rsqrt_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); + +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rsqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rsqrt_pbh (__m256bh __A) @@ -733,6 +1084,34 @@ _mm_maskz_rsqrt_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sqrt_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_sqrtbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_sqrt_pbh (__m256bh __A) @@ -789,6 +1168,34 @@ _mm_maskz_sqrt_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_rcp_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_rcpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_rcp_pbh (__m256bh __A) @@ -845,6 +1252,33 @@ _mm_maskz_rcp_pbh (__mmask8 __U, __m128bh __A) __U); } +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getexp_pbh (__m512bh __A) +{ + return (__m512bh) + __builtin_ia32_getexpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A) +{ + return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A) +{ + return (__m512bh) + __builtin_ia32_getexpbf16512_mask (__A, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getexp_pbh (__m256bh __A) @@ -903,6 +1337,34 @@ _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A) /* Intrinsics vrndscalebf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_roundscale_pbh (__m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_rndscalebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_roundscale_pbh (__m256bh __A, int B) @@ -962,6 +1424,19 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B) } #else +#define _mm512_roundscale_pbh(A, B) \ + (__builtin_ia32_rndscalebf16512_mask ((A), (B), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_roundscale_pbh(A, B, C, D) \ + (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B))) + +#define _mm512_maskz_roundscale_pbh(A, B, C) \ + (__builtin_ia32_rndscalebf16512_mask ((B), (C), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_roundscale_pbh(A, B) \ (__builtin_ia32_rndscalebf16256_mask ((A), (B), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -992,6 +1467,35 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B) /* Intrinsics vreducebf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_reduce_pbh (__m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B) +{ + return (__m512bh) + __builtin_ia32_reducebf16512_mask (__A, B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_reduce_pbh (__m256bh __A, int B) @@ -1051,6 +1555,19 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B) } #else +#define _mm512_reduce_pbh(A, B) \ + (__builtin_ia32_reducebf16512_mask ((A), (B), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_reduce_pbh(A, B, C, D) \ + (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B))) + +#define _mm512_maskz_reduce_pbh(A, B, C) \ + (__builtin_ia32_reducebf16512_mask ((B), (C), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_reduce_pbh(A, B) \ (__builtin_ia32_reducebf16256_mask ((A), (B), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -1082,6 +1599,40 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B) /* Intrinsics vgetmantbf16. */ #ifdef __OPTIMIZE__ +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + (__v32bf) _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + __W, __U); +} + +extern __inline__ __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A, + _MM_MANTISSA_NORM_ENUM __B, + _MM_MANTISSA_SIGN_ENUM __C) +{ + return (__m512bh) + __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B, + (__v32bf) _mm512_setzero_si512 (), + __U); +} + extern __inline__ __m256bh __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B, @@ -1151,6 +1702,19 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A, } #else +#define _mm512_getmant_pbh(A, B, C) \ + (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)), \ + (__v32bf) _mm512_setzero_si512 (), \ + (__mmask32) -1)) + +#define _mm512_mask_getmant_pbh(A, B, C, D, E) \ + (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B))) + +#define _mm512_maskz_getmant_pbh(A, B, C, D) \ + (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)), \ + (__v32bf) _mm512_setzero_si512 (), \ + (A))) + #define _mm256_getmant_pbh(A, B, C) \ (__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)), \ (__v16bf) _mm256_setzero_si256 (), \ @@ -1180,6 +1744,24 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A, /* Intrinsics vfpclassbf16. */ #ifdef __OPTIMIZE__ +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A, + const int __imm) +{ + return (__mmask32) + __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm) +{ + return (__mmask32) + __builtin_ia32_fpclassbf16512_mask (__A, __imm, + (__mmask32) -1); +} + extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_fpclass_pbh_mask (__mmask16 __U, __m256bh __A, @@ -1214,6 +1796,14 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm) } #else +#define _mm512_mask_fpclass_pbh_mask(U, X, C) \ + ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ + (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U))) + +#define _mm512_fpclass_pbh_mask(X, C) \ + ((__mmask32) __builtin_ia32_fpclassbf16512_mask ( \ + (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1))) + #define _mm256_mask_fpclass_pbh_mask(U, A, B) \ ((__mmask16) __builtin_ia32_fpclassbf16256_mask ((A), (B), (U))) @@ -1233,6 +1823,24 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm) /* Intrinsics vcmpbf16. */ #ifdef __OPTIMIZE__ +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B, + const int __imm) +{ + return (__mmask32) + __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm) +{ + return (__mmask32) + __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, + (__mmask32) -1); +} + extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A, @@ -1268,6 +1876,12 @@ _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm) } #else +#define _mm512_mask_cmp_pbh_mask(A, B, C, D) \ + ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A))) + +#define _mm512_cmp_pbh_mask(A, B, C) \ + ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1))) + #define _mm256_mask_cmp_pbh_mask(A, B, C, D) \ ((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A))) diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h index 8cbdc66..f2fb98f 100644 --- a/gcc/config/i386/avx10_2convertintrin.h +++ b/gcc/config/i386/avx10_2convertintrin.h @@ -98,6 +98,103 @@ _mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B) (__mmask16) __U); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx2ps_ph (__m512 __A, __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, + __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) __W, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) -1, + __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A, + __m512 __B, const int __R) +{ + return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A, + (__v16sf) __B, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + __R); +} + +#else +#define _mm512_cvtx_round2ps_ph(A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) \ + (_mm512_setzero_ph ()), \ + (__mmask32) (-1), \ + (R))) +#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) (W), \ + (__mmask32) (U), \ + (R))) +#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \ + ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (__v32hf) \ + (_mm512_setzero_ph ()), \ + (__mmask32) (U), \ + (R))) +#endif /* __OPTIMIZE__ */ + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbiasph_bf8 (__m128i __A, __m128h __B) @@ -161,6 +258,39 @@ _mm256_maskz_cvtbiasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_biasph_bf8 (__m128i __A, __m128h __B) @@ -224,6 +354,39 @@ _mm256_maskz_cvts_biasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbiasph_hf8 (__m128i __A, __m128h __B) @@ -287,6 +450,39 @@ _mm256_maskz_cvtbiasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A, + __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_biasph_hf8 (__m128i __A, __m128h __B) @@ -350,6 +546,39 @@ _mm256_maskz_cvts_biasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U, + __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B) +{ + return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A, + (__v32hf) __B, + (__v32qi)(__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt2ph_bf8 (__m128h __A, __m128h __B) @@ -416,6 +645,39 @@ _mm256_maskz_cvt2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_2ph_bf8 (__m128h __A, __m128h __B) @@ -482,6 +744,39 @@ _mm256_maskz_cvts_2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvt2ph_hf8 (__m128h __A, __m128h __B) @@ -548,6 +843,39 @@ _mm256_maskz_cvt2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_2ph_hf8 (__m128h __A, __m128h __B) @@ -614,6 +942,39 @@ _mm256_maskz_cvts_2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B) (__mmask32) __U); } +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U, + __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) __W, + (__mmask64) __U); +} + +extern __inline__ __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B) +{ + return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A, + (__v32hf) __B, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} + extern __inline__ __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvthf8_ph (__m128i __A) @@ -672,6 +1033,35 @@ _mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A) (__mmask16) __U); } +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvthf8_ph (__m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) + _mm512_undefined_ph (), + (__mmask32) -1); +} + +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) __W, + (__mmask32) __U); +} + +extern __inline__ __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A) +{ + return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A, + (__v32hf) (__m512h) + _mm512_setzero_ph (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtph_bf8 (__m128h __A) @@ -730,6 +1120,35 @@ _mm256_maskz_cvtph_bf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_bf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_ph_bf8 (__m128h __A) @@ -788,6 +1207,35 @@ _mm256_maskz_cvts_ph_bf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_ph_bf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtph_hf8 (__m128h __A) @@ -846,6 +1294,35 @@ _mm256_maskz_cvtph_hf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtph_hf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi)(__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline__ __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvts_ph_hf8 (__m128h __A) @@ -904,6 +1381,35 @@ _mm256_maskz_cvts_ph_hf8 (__mmask16 __U, __m256h __A) (__mmask16) __U); } +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvts_ph_hf8 (__m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) __W, + (__mmask32) __U); +} + +extern __inline__ __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A) +{ + return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A, + (__v32qi) (__m256i) + _mm256_setzero_si256 (), + (__mmask32) __U); +} + extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtbf8_ph (__m128i __A) @@ -952,6 +1458,30 @@ _mm256_maskz_cvtbf8_ph (__mmask16 __U, __m128i __A) (__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8)); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtbf8_ph (__m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( + (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 ( + (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A) +{ + return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( + (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8)); +} + #ifdef __DISABLE_AVX10_2__ #undef __DISABLE_AVX10_2__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx10_2mediaintrin.h b/gcc/config/i386/avx10_2mediaintrin.h index 0993e8e..7d30502 100644 --- a/gcc/config/i386/avx10_2mediaintrin.h +++ b/gcc/config/i386/avx10_2mediaintrin.h @@ -394,6 +394,198 @@ _mm256_maskz_dpbuuds_epi32 (__mmask8 __U, __m256i __W, (__mmask8) __U); } +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_dpwsud_epi32 (__m128i __W, __mmask8 __U, @@ -682,6 +874,233 @@ _mm256_maskz_dpwuuds_epi32 (__mmask8 __U, __m256i __W, (__mmask8) __U); } +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W, + __m512i __A, __m512i __B) +{ + return (__m512i) + __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W, + (__v16si) __A, + (__v16si) __B, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_mask ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) -1); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A, + __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_mask ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) __U); +} + +extern __inline __m512 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A, + __m512h __B) +{ + return (__m512) + __builtin_ia32_vdpphps512_maskz ((__v16sf) __W, + (__v16sf) __A, + (__v16sf) __B, + (__mmask16) __U); +} + extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_dpph_ps (__m256 __W, __m256h __A, __m256h __B) @@ -800,6 +1219,39 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X, (__v16hi) _mm256_setzero_si256 (), __U); } + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X, + (__v64qi) __Y, + __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X, + __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, + (__v64qi) __Y, + __M, + (__v32hi) __W, + __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X, + __m512i __Y, const int __M) +{ + return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X, + (__v64qi) __Y, + __M, + (__v32hi) _mm512_setzero_epi32 (), + __U); +} #else #define _mm_mask_mpsadbw_epu8(W, U, X, Y, M) \ (__m128i) __builtin_ia32_mpsadbw128_mask ((__v16qi)(__m128i)(X), \ @@ -829,6 +1281,23 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X, (__v16hi) _mm256_setzero_si256 (), \ (__mmask16)(U)) +#define _mm512_mpsadbw_epu8(X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), (int)(M)) + +#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), \ + (int)(M), \ + (__v32hi)(__m512i)(W), \ + (__mmask32)(U)) + +#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M) \ + (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X), \ + (__v64qi)(__m512i)(Y), \ + (int)(M), \ + (__v32hi) _mm512_setzero_epi32 (), \ + (__mmask32)(U)) #endif #ifdef __DISABLE_AVX10_2__ diff --git a/gcc/config/i386/avx10_2minmaxintrin.h b/gcc/config/i386/avx10_2minmaxintrin.h index 0a4a253..f9fe14e 100644 --- a/gcc/config/i386/avx10_2minmaxintrin.h +++ b/gcc/config/i386/avx10_2minmaxintrin.h @@ -103,6 +103,43 @@ _mm256_maskz_minmax_pbh (__mmask16 __U, __m256bh __A, (__mmask16) __U); } +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf)(__m512bh) + _mm512_setzero_si512 (), + (__mmask32) -1); +} + +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U, + __m512bh __A, __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf) __W, + (__mmask32) __U); +} + +extern __inline __m512bh +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A, + __m512bh __B, const int __C) +{ + return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A, + (__v32bf) __B, + __C, + (__v32bf)(__m512bh) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_pd (__m128d __A, __m128d __B, const int __C) @@ -169,6 +206,84 @@ _mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C) (__mmask8) __U); } +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __C) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C, + const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A, + __m512d __B, const int __C, const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m512d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B, + const int __C, const int __R) +{ + return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A, + (__v8df) __B, + __C, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128h __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_ph (__m128h __A, __m128h __B, const int __C) @@ -235,6 +350,83 @@ _mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C) (__mmask16) __U); } +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_undefined_ph (), + (__mmask32) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A, + __m512h __B, const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) __W, + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B, + const int __C) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_undefined_ph (), + (__mmask32) -1, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A, + __m512h __B, const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) __W, + (__mmask32) __U, __R); +} + +extern __inline __m512h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B, + const int __C, const int __R) +{ + return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A, + (__v32hf) __B, + __C, + (__v32hf) + _mm512_setzero_ph (), + (__mmask32) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_ps (__m128 __A, __m128 __B, const int __C) @@ -301,6 +493,83 @@ _mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C) (__mmask8) __U); } +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B, + const int __C) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A, + __m512 __B, const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) __W, + (__mmask16) __U, __R); +} + +extern __inline __m512 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B, + const int __C, const int __R) +{ + return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A, + (__v16sf) __B, + __C, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, __R); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_minmax_sd (__m128d __A, __m128d __B, const int __C) @@ -580,6 +849,29 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_si256 (), \ (__mmask16) (U))) +#define _mm512_minmax_pbh(A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) \ + _mm512_setzero_si512 (), \ + (__mmask32) (-1))) + +#define _mm512_mask_minmax_pbh(W, U, A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) (W), \ + (__mmask32) (U))) + +#define _mm512_maskz_minmax_pbh(U, A, B, C) \ + ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A), \ + (__v32bf) (B), \ + (int) (C), \ + (__v32bf) (__m512bh) \ + _mm512_setzero_si512 (), \ + (__mmask32) (U))) + #define _mm_minmax_pd(A, B, C) \ ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A), \ (__v2df) (B), \ @@ -626,6 +918,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_pd (), \ (__mmask8) (U))) +#define _mm512_minmax_pd(A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_undefined_pd (), \ + (__mmask8) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_pd(W, U, A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) (W), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_pd(U, A, B, C) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_setzero_pd (), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_pd(A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_undefined_pd (), \ + (__mmask8) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) (W), \ + (__mmask8) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \ + ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A), \ + (__v8df) (B), \ + (int) (C), \ + (__v8df) (__m512d) \ + _mm512_setzero_pd (), \ + (__mmask8) (U), \ + (int) (R))) + #define _mm_minmax_ph(A, B, C) \ ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A), \ (__v8hf) (B), \ @@ -672,6 +1016,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_ph (), \ (__mmask16) (U))) +#define _mm512_minmax_ph(A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_undefined_ph (), \ + (__mmask32) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_ph(W, U, A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) (W), \ + (__mmask32) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_ph(U, A, B, C) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_setzero_ph (), \ + (__mmask32) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_ph(A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_undefined_ph (), \ + (__mmask32) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) (W), \ + (__mmask32) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \ + ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A), \ + (__v32hf) (B), \ + (int) (C), \ + (__v32hf) (__m512h) \ + _mm512_setzero_ph (), \ + (__mmask32) (U), \ + (int) (R))) + #define _mm_minmax_ps(A, B, C) \ ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A), \ (__v4sf) (B), \ @@ -718,6 +1114,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B, _mm256_setzero_ps (), \ (__mmask8) (U))) +#define _mm512_minmax_ps(A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_undefined_ps (), \ + (__mmask16) (-1), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_ps(W, U, A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) (W), \ + (__mmask16) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_ps(U, A, B, C) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_setzero_ps (), \ + (__mmask16) (U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_ps(A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_undefined_ps (), \ + (__mmask16) (-1), \ + (int) (R))) + +#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) (W), \ + (__mmask16) (U), \ + (int) (R))) + +#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \ + ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), \ + (__v16sf) (B), \ + (int) (C), \ + (__v16sf) (__m512) \ + _mm512_setzero_ps (), \ + (__mmask16) (U), \ + (int) (R))) + #define _mm_minmax_round_sd(A, B, C, R) \ ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A), \ (__v2df) (B), \ diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h index 78bcd72..c4fa19b 100644 --- a/gcc/config/i386/avx10_2satcvtintrin.h +++ b/gcc/config/i386/avx10_2satcvtintrin.h @@ -63,37 +63,6 @@ _mm_maskz_ipcvts_bf16_epi8 (__mmask8 __U, __m128bh __A) (__mmask8) __U); } -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_ipcvts_bf16_epi8 (__m256bh __A) -{ - return - (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) - _mm256_undefined_si256 (), - (__mmask16) -1); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A) -{ - return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A) -{ - return - (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, - (__v16hi) - _mm256_setzero_si256 (), - (__mmask16) __U); -} - extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ipcvts_bf16_epu8 (__m128bh __A) @@ -127,6 +96,37 @@ _mm_maskz_ipcvts_bf16_epu8 (__mmask8 __U, __m128bh __A) extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_ipcvts_bf16_epi8 (__m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A) +{ + return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A) +{ + return + (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_ipcvts_bf16_epu8 (__m256bh __A) { return @@ -156,120 +156,66 @@ _mm256_maskz_ipcvts_bf16_epu8 (__mmask16 __U, __m256bh __A) (__mmask16) __U); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ph_epi8 (__m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) - _mm_undefined_si128 (), - (__mmask8) -1); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ph_epu8 (__m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) - _mm_undefined_si128 (), - (__mmask8) -1); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A) -{ - return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, - (__v8hi) - _mm_setzero_si128 (), - (__mmask8) __U); -} - -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ps_epi8 (__m128 __A) +_mm512_ipcvts_bf16_epi8 (__m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) - _mm_undefined_si128 (), - (__mmask8) -1); + return + (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A) +_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A) +_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return + (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ipcvts_ps_epu8 (__m128 __A) +_mm512_ipcvts_bf16_epu8 (__m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) - _mm_undefined_si128 (), - (__mmask8) -1); + return + (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A) +_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); } -extern __inline __m128i +extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A) +_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A) { - return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, - (__v4si) - _mm_setzero_si128 (), - (__mmask8) __U); + return + (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); } extern __inline __m128i @@ -390,6 +336,183 @@ _mm256_maskz_ipcvtts_bf16_epu8 (__mmask16 __U, __m256bh __A) (__mmask16) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_bf16_epi8 (__m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A) +{ + return + (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_bf16_epu8 (__m512bh __A) +{ + return (__m512i) + __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A) +{ + return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A) +{ + return (__m512i) + __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ph_epi8 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ph_epu8 (__m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A) +{ + return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ps_epi8 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ipcvts_ps_epu8 (__m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask8) __U); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ipcvtts_ph_epi8 (__m128h __A) @@ -1234,6 +1357,1416 @@ _mm256_maskz_cvtts_ps_epu64 (__mmask8 __U, __m128 __A) (__mmask8) __U); } +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ph_epi8 (__m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ph_epu8 (__m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ps_epi8 (__m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_ps_epu8 (__m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ph_epi8 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A) +{ + return + (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ph_epu8 (__m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A) +{ + return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ps_epi8 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_ps_epu8 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epi32 (__m512d __A) +{ + return (__m256i) + __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A) +{ + return + (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epi64 (__m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) +{ + return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A) +{ + return + (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epu32 (__m512d __A) +{ + return (__m256i) + __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A) +{ + return + (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_pd_epu64 (__m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) +{ + return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epi32 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epi64 (__m256 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) +{ + return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epu32 (__m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) +{ + return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_ps_epu64 (__m256 __A) +{ + return (__m512i) + __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) +{ + return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A) +{ + return + (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_undefined_si512 (), + (__mmask32) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) __W, + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R) +{ + return (__m256i) + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) +{ + return + (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_undefined_si512 (), + (__mmask16) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) __W, + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R) +{ + return (__m512i) + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_undefined_si512 (), + (__mmask8) -1, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, + const int __R) +{ + return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) __W, + (__mmask8) __U, + __R); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R) +{ + return + (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U, + __R); +} +#else +#define _mm512_ipcvts_roundph_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvts_roundph_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvts_roundps_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvts_roundps_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvtts_roundph_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvtts_roundph_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_undefined_si512 ()), \ + (__mmask32) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) (W), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \ + (__v32hi) \ + (_mm512_setzero_si512 ()), \ + (__mmask32) (U), \ + (R))) + +#define _mm512_ipcvtts_roundps_epi8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_ipcvtts_roundps_epu8(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epi32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epi64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epu32(A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_undefined_si256 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \ + ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \ + ((__m256i) \ + __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \ + (__v8si) \ + (_mm256_setzero_si256 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundpd_epu64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epi32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epi64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epu32(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_undefined_si512 ()), \ + (__mmask16) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) (W), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \ + (__v16si) \ + (_mm512_setzero_si512 ()), \ + (__mmask16) (U), \ + (R))) + +#define _mm512_cvtts_roundps_epu64(A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_undefined_si512 ()), \ + (__mmask8) (-1), \ + (R))) + +#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \ + ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) (W), \ + (__mmask8) (U), \ + (R))) + +#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \ + ((__m512i) \ + __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \ + (__v8di) \ + (_mm512_setzero_si512 ()), \ + (__mmask8) (U), \ + (R))) +#endif + extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtts_sd_epi32 (__m128d __A) diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h index 6740109..6c087e6 100644 --- a/gcc/config/i386/avx512bf16intrin.h +++ b/gcc/config/i386/avx512bf16intrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BF16INTRIN_H_INCLUDED #define _AVX512BF16INTRIN_H_INCLUDED -#if !defined (__AVX512BF16__) || defined (__EVEX512__) +#if !defined (__AVX512BF16__) #pragma GCC push_options -#pragma GCC target("avx512bf16,no-evex512") +#pragma GCC target("avx512bf16") #define __DISABLE_AVX512BF16__ #endif /* __AVX512BF16__ */ @@ -42,17 +42,6 @@ _mm_cvtsbh_ss (__bf16 __A) return __builtin_ia32_cvtbf2sf (__A); } -#ifdef __DISABLE_AVX512BF16__ -#undef __DISABLE_AVX512BF16__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512BF16__ */ - -#if !defined (__AVX512BF16__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512bf16,evex512") -#define __DISABLE_AVX512BF16_512__ -#endif /* __AVX512BF16_512__ */ - /* Internal data types for implementing the intrinsics. */ typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64))); @@ -155,8 +144,8 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A) (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16))); } -#ifdef __DISABLE_AVX512BF16_512__ -#undef __DISABLE_AVX512BF16_512__ +#ifdef __DISABLE_AVX512BF16__ +#undef __DISABLE_AVX512BF16__ #pragma GCC pop_options #endif /* __DISABLE_AVX512BF16_512__ */ diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h index ffaceac..fd6d183 100644 --- a/gcc/config/i386/avx512bf16vlintrin.h +++ b/gcc/config/i386/avx512bf16vlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BF16VLINTRIN_H_INCLUDED #define _AVX512BF16VLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512BF16__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512BF16__) #pragma GCC push_options -#pragma GCC target("avx512bf16,avx512vl,no-evex512") +#pragma GCC target("avx512bf16,avx512vl") #define __DISABLE_AVX512BF16VL__ #endif /* __AVX512BF16__ */ diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h index 301f125..d7156f9 100644 --- a/gcc/config/i386/avx512bitalgintrin.h +++ b/gcc/config/i386/avx512bitalgintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BITALGINTRIN_H_INCLUDED #define _AVX512BITALGINTRIN_H_INCLUDED -#if !defined (__AVX512BITALG__) || !defined (__EVEX512__) +#if !defined (__AVX512BITALG__) #pragma GCC push_options -#pragma GCC target("avx512bitalg,evex512") +#pragma GCC target("avx512bitalg") #define __DISABLE_AVX512BITALG__ #endif /* __AVX512BITALG__ */ diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h index e4883cf..cf9cff6 100644 --- a/gcc/config/i386/avx512bitalgvlintrin.h +++ b/gcc/config/i386/avx512bitalgvlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BITALGVLINTRIN_H_INCLUDED #define _AVX512BITALGVLINTRIN_H_INCLUDED -#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || defined (__EVEX512__) +#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512bitalg,avx512vl,no-evex512") +#pragma GCC target("avx512bitalg,avx512vl") #define __DISABLE_AVX512BITALGVL__ #endif /* __AVX512BITALGVL__ */ diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index 47c4c03..5e9eeaa 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512BWINTRIN_H_INCLUDED #define _AVX512BWINTRIN_H_INCLUDED -#if !defined (__AVX512BW__) || defined (__EVEX512__) +#if !defined (__AVX512BW__) #pragma GCC push_options -#pragma GCC target("avx512bw,no-evex512") +#pragma GCC target("avx512bw") #define __DISABLE_AVX512BW__ #endif /* __AVX512BW__ */ @@ -346,17 +346,6 @@ _kandn_mask64 (__mmask64 __A, __mmask64 __B) return (__mmask64) __builtin_ia32_kandndi ((__mmask64) __A, (__mmask64) __B); } -#ifdef __DISABLE_AVX512BW__ -#undef __DISABLE_AVX512BW__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512BW__ */ - -#if !defined (__AVX512BW__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512bw,evex512") -#define __DISABLE_AVX512BW_512__ -#endif /* __AVX512BW_512__ */ - /* Internal data types for implementing the intrinsics. */ typedef short __v32hi __attribute__ ((__vector_size__ (64))); typedef short __v32hi_u __attribute__ ((__vector_size__ (64), \ @@ -3369,8 +3358,8 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N) #endif -#ifdef __DISABLE_AVX512BW_512__ -#undef __DISABLE_AVX512BW_512__ +#ifdef __DISABLE_AVX512BW__ +#undef __DISABLE_AVX512BW__ #pragma GCC pop_options #endif /* __DISABLE_AVX512BW_512__ */ diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h index 206cc49..5a92d25 100644 --- a/gcc/config/i386/avx512cdintrin.h +++ b/gcc/config/i386/avx512cdintrin.h @@ -30,7 +30,7 @@ #ifndef __AVX512CD__ #pragma GCC push_options -#pragma GCC target("avx512cd,evex512") +#pragma GCC target("avx512cd") #define __DISABLE_AVX512CD__ #endif /* __AVX512CD__ */ diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index 1d10225..a7766b5 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512DQINTRIN_H_INCLUDED #define _AVX512DQINTRIN_H_INCLUDED -#if !defined (__AVX512DQ__) || defined (__EVEX512__) +#if !defined (__AVX512DQ__) #pragma GCC push_options -#pragma GCC target("avx512dq,no-evex512") +#pragma GCC target("avx512dq") #define __DISABLE_AVX512DQ__ #endif /* __AVX512DQ__ */ @@ -639,17 +639,6 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) #endif -#ifdef __DISABLE_AVX512DQ__ -#undef __DISABLE_AVX512DQ__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512DQ__ */ - -#if !defined (__AVX512DQ__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512dq,evex512") -#define __DISABLE_AVX512DQ_512__ -#endif /* __AVX512DQ_512__ */ - extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_broadcast_f64x2 (__m128d __A) @@ -2897,9 +2886,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm) #endif -#ifdef __DISABLE_AVX512DQ_512__ -#undef __DISABLE_AVX512DQ_512__ +#ifdef __DISABLE_AVX512DQ__ +#undef __DISABLE_AVX512DQ__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512DQ_512__ */ +#endif /* __DISABLE_AVX512DQ__ */ #endif /* _AVX512DQINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 9160787..4469f73 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512FINTRIN_H_INCLUDED #define _AVX512FINTRIN_H_INCLUDED -#if !defined (__AVX512F__) || defined (__EVEX512__) +#if !defined (__AVX512F__) #pragma GCC push_options -#pragma GCC target("avx512f,no-evex512") +#pragma GCC target("avx512f") #define __DISABLE_AVX512F__ #endif /* __AVX512F__ */ @@ -54,11 +54,12 @@ typedef enum _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ } _MM_MANTISSA_SIGN_ENUM; -/* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms - from AVX2 or before. We need to add them to prevent target option mismatch - when calling AVX512 intrins implemented with these intrins under no-evex512 - function attribute. All AVX512 intrins calling those AVX2 intrins or - before will change their calls to these AVX512 version. */ +/* These _mm{,256}_avx512* intrins are initially duplicated from their + _mm{,256}_* forms from AVX2 or before. At that time, e need to add them + to prevent target option mismatch when calling AVX512 intrins implemented + with these intrins under no-evex512 function attribute. Thess intrins will + still be here to avoid huge changes. All AVX512 intrins calling those AVX2 + intrins or before have changed their calls to these AVX512 version. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avx512_undefined_ps (void) { @@ -3802,17 +3803,6 @@ _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) #endif -#ifdef __DISABLE_AVX512F__ -#undef __DISABLE_AVX512F__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512F__ */ - -#if !defined (__AVX512F__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512f,evex512") -#define __DISABLE_AVX512F_512__ -#endif /* __AVX512F_512__ */ - /* Internal data types for implementing the intrinsics. */ typedef double __v8df __attribute__ ((__vector_size__ (64))); typedef float __v16sf __attribute__ ((__vector_size__ (64))); @@ -16609,9 +16599,9 @@ _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A) #undef __MM512_REDUCE_OP -#ifdef __DISABLE_AVX512F_512__ -#undef __DISABLE_AVX512F_512__ +#ifdef __DISABLE_AVX512F__ +#undef __DISABLE_AVX512F__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512F_512__ */ +#endif /* __DISABLE_AVX512F__ */ #endif /* _AVX512FINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index f158f87..471ec05 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512FP16INTRIN_H_INCLUDED #define _AVX512FP16INTRIN_H_INCLUDED -#if !defined (__AVX512FP16__) || defined (__EVEX512__) +#if !defined (__AVX512FP16__) #pragma GCC push_options -#pragma GCC target("avx512fp16,no-evex512") +#pragma GCC target("avx512fp16") #define __DISABLE_AVX512FP16__ #endif /* __AVX512FP16__ */ @@ -2852,17 +2852,6 @@ _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E) #define _mm_maskz_cmul_round_sch(U, A, B, R) \ _mm_maskz_fcmul_round_sch ((U), (A), (B), (R)) -#ifdef __DISABLE_AVX512FP16__ -#undef __DISABLE_AVX512FP16__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512FP16__ */ - -#if !defined (__AVX512FP16__) || !defined (__EVEX512__) -#pragma GCC push_options -#pragma GCC target("avx512fp16,evex512") -#define __DISABLE_AVX512FP16_512__ -#endif /* __AVX512FP16_512__ */ - typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64))); typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__)); typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64), \ @@ -7238,9 +7227,9 @@ _mm512_set1_pch (_Float16 _Complex __A) #define _mm512_maskz_cmul_round_pch(U, A, B, R) \ _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R)) -#ifdef __DISABLE_AVX512FP16_512__ -#undef __DISABLE_AVX512FP16_512__ +#ifdef __DISABLE_AVX512FP16__ +#undef __DISABLE_AVX512FP16__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512FP16_512__ */ +#endif /* __DISABLE_AVX512FP16__ */ #endif /* _AVX512FP16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h index 59e6c88..cb98310 100644 --- a/gcc/config/i386/avx512fp16vlintrin.h +++ b/gcc/config/i386/avx512fp16vlintrin.h @@ -28,9 +28,9 @@ #ifndef __AVX512FP16VLINTRIN_H_INCLUDED #define __AVX512FP16VLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) #pragma GCC push_options -#pragma GCC target("avx512fp16,avx512vl,no-evex512") +#pragma GCC target("avx512fp16,avx512vl") #define __DISABLE_AVX512FP16VL__ #endif /* __AVX512FP16VL__ */ diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h index ed97350..56790c0 100644 --- a/gcc/config/i386/avx512ifmaintrin.h +++ b/gcc/config/i386/avx512ifmaintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512IFMAINTRIN_H_INCLUDED #define _AVX512IFMAINTRIN_H_INCLUDED -#if !defined (__AVX512IFMA__) || !defined (__EVEX512__) +#if !defined (__AVX512IFMA__) #pragma GCC push_options -#pragma GCC target("avx512ifma,evex512") +#pragma GCC target("avx512ifma") #define __DISABLE_AVX512IFMA__ #endif /* __AVX512IFMA__ */ diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h index 681bda3..6b849c8 100644 --- a/gcc/config/i386/avx512ifmavlintrin.h +++ b/gcc/config/i386/avx512ifmavlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512IFMAVLINTRIN_H_INCLUDED #define _AVX512IFMAVLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) #pragma GCC push_options -#pragma GCC target("avx512ifma,avx512vl,no-evex512") +#pragma GCC target("avx512ifma,avx512vl") #define __DISABLE_AVX512IFMAVL__ #endif /* __AVX512IFMAVL__ */ diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h index f5515a8..e8bfe1d 100644 --- a/gcc/config/i386/avx512vbmi2intrin.h +++ b/gcc/config/i386/avx512vbmi2intrin.h @@ -28,9 +28,9 @@ #ifndef __AVX512VBMI2INTRIN_H_INCLUDED #define __AVX512VBMI2INTRIN_H_INCLUDED -#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__) +#if !defined(__AVX512VBMI2__) #pragma GCC push_options -#pragma GCC target("avx512vbmi2,evex512") +#pragma GCC target("avx512vbmi2") #define __DISABLE_AVX512VBMI2__ #endif /* __AVX512VBMI2__ */ diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h index e9857ba..5cdfebd 100644 --- a/gcc/config/i386/avx512vbmi2vlintrin.h +++ b/gcc/config/i386/avx512vbmi2vlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED #define _AVX512VBMI2VLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) #pragma GCC push_options -#pragma GCC target("avx512vbmi2,avx512vl,no-evex512") +#pragma GCC target("avx512vbmi2,avx512vl") #define __DISABLE_AVX512VBMI2VL__ #endif /* __AVX512VBMIVL__ */ diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h index 901a2f7..5f5e342 100644 --- a/gcc/config/i386/avx512vbmiintrin.h +++ b/gcc/config/i386/avx512vbmiintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VBMIINTRIN_H_INCLUDED #define _AVX512VBMIINTRIN_H_INCLUDED -#if !defined (__AVX512VBMI__) || !defined (__EVEX512__) +#if !defined (__AVX512VBMI__) #pragma GCC push_options -#pragma GCC target("avx512vbmi,evex512") +#pragma GCC target("avx512vbmi") #define __DISABLE_AVX512VBMI__ #endif /* __AVX512VBMI__ */ diff --git a/gcc/config/i386/avx512vbmivlintrin.h b/gcc/config/i386/avx512vbmivlintrin.h index 90cd590..037ea93 100644 --- a/gcc/config/i386/avx512vbmivlintrin.h +++ b/gcc/config/i386/avx512vbmivlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VBMIVLINTRIN_H_INCLUDED #define _AVX512VBMIVLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) #pragma GCC push_options -#pragma GCC target("avx512vbmi,avx512vl,no-evex512") +#pragma GCC target("avx512vbmi,avx512vl") #define __DISABLE_AVX512VBMIVL__ #endif /* __AVX512VBMIVL__ */ diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 9f0a5b4..537e408 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VLBWINTRIN_H_INCLUDED #define _AVX512VLBWINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512BW__) #pragma GCC push_options -#pragma GCC target("avx512vl,avx512bw,no-evex512") +#pragma GCC target("avx512vl,avx512bw") #define __DISABLE_AVX512VLBW__ #endif /* __AVX512VLBW__ */ diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h index 3b23d4a..5783dbe 100644 --- a/gcc/config/i386/avx512vldqintrin.h +++ b/gcc/config/i386/avx512vldqintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VLDQINTRIN_H_INCLUDED #define _AVX512VLDQINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) #pragma GCC push_options -#pragma GCC target("avx512vl,avx512dq,no-evex512") +#pragma GCC target("avx512vl,avx512dq") #define __DISABLE_AVX512VLDQ__ #endif /* __AVX512VLDQ__ */ diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 4451a1f..50930cd 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VLINTRIN_H_INCLUDED #define _AVX512VLINTRIN_H_INCLUDED -#if !defined (__AVX512VL__) || defined (__EVEX512__) +#if !defined (__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512vl,no-evex512") +#pragma GCC target("avx512vl") #define __DISABLE_AVX512VL__ #endif /* __AVX512VL__ */ @@ -13650,7 +13650,7 @@ _mm256_permutex_pd (__m256d __X, const int __M) #if !defined (__AVX512CD__) || !defined (__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512vl,avx512cd,no-evex512") +#pragma GCC target("avx512vl,avx512cd") #define __DISABLE_AVX512VLCD__ #endif diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h index 5d0eaff..fe7b663 100644 --- a/gcc/config/i386/avx512vnniintrin.h +++ b/gcc/config/i386/avx512vnniintrin.h @@ -28,9 +28,9 @@ #ifndef __AVX512VNNIINTRIN_H_INCLUDED #define __AVX512VNNIINTRIN_H_INCLUDED -#if !defined(__AVX512VNNI__) || !defined (__EVEX512__) +#if !defined(__AVX512VNNI__) #pragma GCC push_options -#pragma GCC target("avx512vnni,evex512") +#pragma GCC target("avx512vnni") #define __DISABLE_AVX512VNNI__ #endif /* __AVX512VNNI__ */ diff --git a/gcc/config/i386/avx512vnnivlintrin.h b/gcc/config/i386/avx512vnnivlintrin.h index 7774bbd..01c3c91 100644 --- a/gcc/config/i386/avx512vnnivlintrin.h +++ b/gcc/config/i386/avx512vnnivlintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VNNIVLINTRIN_H_INCLUDED #define _AVX512VNNIVLINTRIN_H_INCLUDED -#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) || defined (__EVEX512__) +#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) #pragma GCC push_options -#pragma GCC target("avx512vnni,avx512vl,no-evex512") +#pragma GCC target("avx512vnni,avx512vl") #define __DISABLE_AVX512VNNIVL__ #endif /* __AVX512VNNIVL__ */ diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h index e170cf5..50f7ead 100644 --- a/gcc/config/i386/avx512vp2intersectintrin.h +++ b/gcc/config/i386/avx512vp2intersectintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED #define _AVX512VP2INTERSECTINTRIN_H_INCLUDED -#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__) +#if !defined(__AVX512VP2INTERSECT__) #pragma GCC push_options -#pragma GCC target("avx512vp2intersect,evex512") +#pragma GCC target("avx512vp2intersect") #define __DISABLE_AVX512VP2INTERSECT__ #endif /* __AVX512VP2INTERSECT__ */ diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h index afdd2da..3e0a8ab 100644 --- a/gcc/config/i386/avx512vp2intersectvlintrin.h +++ b/gcc/config/i386/avx512vp2intersectvlintrin.h @@ -28,10 +28,9 @@ #ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED #define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED -#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__) \ - || defined (__EVEX512__) +#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512vp2intersect,avx512vl,no-evex512") +#pragma GCC target("avx512vp2intersect,avx512vl") #define __DISABLE_AVX512VP2INTERSECTVL__ #endif /* __AVX512VP2INTERSECTVL__ */ diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h index 3357255..e4b89ea 100644 --- a/gcc/config/i386/avx512vpopcntdqintrin.h +++ b/gcc/config/i386/avx512vpopcntdqintrin.h @@ -28,9 +28,9 @@ #ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED #define _AVX512VPOPCNTDQINTRIN_H_INCLUDED -#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__) +#if !defined (__AVX512VPOPCNTDQ__) #pragma GCC push_options -#pragma GCC target("avx512vpopcntdq,evex512") +#pragma GCC target("avx512vpopcntdq") #define __DISABLE_AVX512VPOPCNTDQ__ #endif /* __AVX512VPOPCNTDQ__ */ diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h index 17d836f..8eb1d42 100644 --- a/gcc/config/i386/avx512vpopcntdqvlintrin.h +++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h @@ -28,10 +28,9 @@ #ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED #define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED -#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) \ - || defined (__EVEX512__) +#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512vpopcntdq,avx512vl,no-evex512") +#pragma GCC target("avx512vpopcntdq,avx512vl") #define __DISABLE_AVX512VPOPCNTDQVL__ #endif /* __AVX512VPOPCNTDQVL__ */ diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h index 3ddcbec..0a3173c 100644 --- a/gcc/config/i386/cygming.h +++ b/gcc/config/i386/cygming.h @@ -28,16 +28,15 @@ along with GCC; see the file COPYING3. If not see #undef TARGET_SEH #define TARGET_SEH (TARGET_64BIT_MS_ABI && flag_unwind_tables) +#undef PREFERRED_STACK_BOUNDARY_DEFAULT +#define PREFERRED_STACK_BOUNDARY_DEFAULT \ + (TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY) + /* Win64 with SEH cannot represent DRAP stack frames. Disable its use. Force the use of different mechanisms to allocate aligned local data. */ #undef MAX_STACK_ALIGNMENT #define MAX_STACK_ALIGNMENT (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT) -/* 32-bit Windows aligns the stack on a 4-byte boundary but SSE instructions - may require 16-byte alignment. */ -#undef STACK_REALIGN_DEFAULT -#define STACK_REALIGN_DEFAULT TARGET_SSE - /* Support hooks for SEH. */ #undef TARGET_ASM_UNWIND_EMIT #define TARGET_ASM_UNWIND_EMIT i386_pe_seh_unwind_emit @@ -247,9 +246,10 @@ do { \ #undef ASM_OUTPUT_LABELREF #define ASM_OUTPUT_LABELREF(STREAM, NAME) \ do { \ + const char *prefix = ""; \ if ((NAME)[0] != FASTCALL_PREFIX) \ - fputs (user_label_prefix, (STREAM)); \ - fputs ((NAME), (STREAM)); \ + prefix = user_label_prefix; \ + ix86_asm_output_labelref ((STREAM), prefix, (NAME)); \ } while (0) /* This does much the same in memory rather than to a stream. */ diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 1ff05e5..fe71f55 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -374,33 +374,6 @@ detect_caches_intel (bool xeon_mp, unsigned max_level, #define has_feature(f) \ has_cpu_feature (&cpu_model, cpu_features2, f) -/* We will emit a warning when using AVX10.1 and AVX512 options with one - enabled and the other disabled. Add this function to avoid push "-mno-" - options under this scenario for -march=native. */ - -bool check_avx512_features (__processor_model &cpu_model, - unsigned int (&cpu_features2)[SIZE_OF_CPU_FEATURES], - const enum processor_features feature) -{ - if (has_feature (FEATURE_AVX10_1_256) - && ((feature == FEATURE_AVX512F) - || (feature == FEATURE_AVX512CD) - || (feature == FEATURE_AVX512DQ) - || (feature == FEATURE_AVX512BW) - || (feature == FEATURE_AVX512VL) - || (feature == FEATURE_AVX512IFMA) - || (feature == FEATURE_AVX512VBMI) - || (feature == FEATURE_AVX512VBMI2) - || (feature == FEATURE_AVX512VNNI) - || (feature == FEATURE_AVX512VPOPCNTDQ) - || (feature == FEATURE_AVX512BITALG) - || (feature == FEATURE_AVX512FP16) - || (feature == FEATURE_AVX512BF16))) - return false; - - return true; -} - /* This will be called by the spec parser in gcc.cc when it sees a %:local_cpu_detect(args) construct. Currently it will be called with either "arch [32|64]" or "tune [32|64]" as argument @@ -627,7 +600,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (has_feature (FEATURE_AVX512F)) { /* Assume Diamond Rapids. */ - if (has_feature (FEATURE_AMX_TRANSPOSE)) + if (has_feature (FEATURE_AMX_FP8)) cpu = "diamondrapids"; /* Assume Granite Rapids D. */ else if (has_feature (FEATURE_AMX_COMPLEX)) @@ -909,12 +882,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) options = concat (options, " ", isa_names_table[i].option, NULL); } - /* Never push -mno-avx10.1-{256,512} under -march=native to - avoid unnecessary warnings when building libraries. */ - else if (isa_names_table[i].feature != FEATURE_AVX10_1_256 - && isa_names_table[i].feature != FEATURE_AVX10_1 - && check_avx512_features (cpu_model, cpu_features2, - isa_names_table[i].feature)) + else options = concat (options, neg_option, isa_names_table[i].option + 2, NULL); } diff --git a/gcc/config/i386/gcc-auto-profile b/gcc/config/i386/gcc-auto-profile index 528b34e..0e9e5fe 100755 --- a/gcc/config/i386/gcc-auto-profile +++ b/gcc/config/i386/gcc-auto-profile @@ -24,8 +24,16 @@ if [ "$1" = "--all" ] ; then shift fi -if ! grep -q Intel /proc/cpuinfo ; then - echo >&2 "Only Intel CPUs supported" +if grep -q AuthenticAMD /proc/cpuinfo ; then + vendor=AMD + if ! grep -q " brs" /proc/cpuinfo && ! grep -q amd_lbr_v2 /proc/cpuinfo ; then + echo >&2 "AMD CPU with brs (Zen 3) or amd_lbr_v2 (Zen 4+) feature is required" + exit 1 + fi +elif grep -q Intel /proc/cpuinfo ; then + vendor=Intel +else + echo >&2 "Only AMD and Intel CPUs supported" exit 1 fi @@ -33,7 +41,7 @@ if grep -q hypervisor /proc/cpuinfo ; then echo >&2 "Warning: branch profiling may not be functional in VMs" fi -case `grep -E -q "^cpu family\s*: 6" /proc/cpuinfo && +case `test $vendor = Intel && grep -E -q "^cpu family\s*: 6" /proc/cpuinfo && grep -E "^model\s*:" /proc/cpuinfo | head -n1` in model*:\ 46|\ model*:\ 30|\ @@ -82,6 +90,8 @@ model*:\ 126|\ model*:\ 167|\ model*:\ 140|\ model*:\ 141|\ +model*:\ 143|\ +model*:\ 207|\ model*:\ 106|\ model*:\ 108|\ model*:\ 173|\ @@ -89,15 +99,20 @@ model*:\ 174) E="cpu/event=0xc4,umask=0x20/$FLAGS" ;; model*:\ 134|\ model*:\ 150|\ model*:\ 156) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;; -model*:\ 143|\ -model*:\ 207) E="cpu/event=0xc4,umask=0x20/p$FLAGS" ;; -model*:\ 190) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;; +model*:\ 190|\ +model*:\ 175|\ +model*:\ 182) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;; model*:\ 190) E="cpu/event=0xc4,umask=0xfe/$FLAGS" ;; *) if perf list br_inst_retired | grep -q br_inst_retired.near_taken ; then E=br_inst_retired.near_taken:p + elif perf list ex_ret_brn_tkn | grep -q ex_ret_brn_tkn ; then + E=ex_ret_brn_tkn:P$FLAGS + elif $vendor = Intel ; then +echo >&2 "Unknown Intel CPU. Run contrib/gen_autofdo_event.py --all --script to update script." + exit 1 else -echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script." +echo >&2 "AMD CPU without support for ex_ret_brn_tkn event" exit 1 fi ;; esac diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h index c7e21e6..bc433c2 100644 --- a/gcc/config/i386/gfniintrin.h +++ b/gcc/config/i386/gfniintrin.h @@ -297,9 +297,9 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B, #pragma GCC pop_options #endif /* __GFNIAVX512VLBW__ */ -#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__) +#if !defined(__GFNI__) || !defined(__AVX512F__) #pragma GCC push_options -#pragma GCC target("gfni,avx512f,evex512") +#pragma GCC target("gfni,avx512f") #define __DISABLE_GFNIAVX512F__ #endif /* __GFNIAVX512F__ */ @@ -341,9 +341,9 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C) #pragma GCC pop_options #endif /* __GFNIAVX512F__ */ -#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__) +#if !defined(__GFNI__) || !defined(__AVX512BW__) #pragma GCC push_options -#pragma GCC target("gfni,avx512bw,evex512") +#pragma GCC target("gfni,avx512bw") #define __DISABLE_GFNIAVX512FBW__ #endif /* __GFNIAVX512FBW__ */ diff --git a/gcc/config/i386/host-mingw32.cc b/gcc/config/i386/host-mingw32.cc index e083f49..87804a5 100644 --- a/gcc/config/i386/host-mingw32.cc +++ b/gcc/config/i386/host-mingw32.cc @@ -135,7 +135,6 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd, and earlier, backslashes are invalid in object name. So, we need to check if we are on Windows2000 or higher. */ OSVERSIONINFO version_info; - int r; version_info.dwOSVersionInfoSize = sizeof (version_info); @@ -169,25 +168,24 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd, return -1; } - /* Retry five times, as here might occure a race with multiple gcc's - instances at same time. */ - for (r = 0; r < 5; r++) - { - mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset, - size, addr); - if (mmap_addr == addr) - break; - if (r != 4) - Sleep (500); - } - - if (mmap_addr != addr) + /* Try mapping the file at `addr`. */ + mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset, + size, addr); + if (mmap_addr == NULL) { - w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx"); - CloseHandle(mmap_handle); - return -1; + /* We could not map the file at its original address, so let the + system choose a different one. The PCH can be relocated later. */ + mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset, + size, NULL); + if (mmap_addr == NULL) + { + w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx"); + CloseHandle(mmap_handle); + return -1; + } } + addr = mmap_addr; return 1; } diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index a142711..fe42c6436 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -204,53 +204,53 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstored256, "__builtin_ia32_mas BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI) /* AVX512F */ -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loaddf_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadsf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storedf_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI) @@ -297,14 +297,14 @@ BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_si, BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_di, "__builtin_ia32_cmpccxadd64", IX86_BUILTIN_CMPCCXADD64, UNKNOWN, (int) LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT) /* AVX512BW */ -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI) /* AVX512VP2INTERSECT */ -BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI) -BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI) +BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI) +BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd256", IX86_BUILTIN_2INTERSECTD256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq256", IX86_BUILTIN_2INTERSECTQ256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4DI_V4DI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd128", IX86_BUILTIN_2INTERSECTD128, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4SI_V4SI) @@ -411,9 +411,9 @@ BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI) /* AVX512FP16 */ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI) @@ -434,17 +434,17 @@ BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_B BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED) /* VBMI2 */ -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev32qi_mask, "__builtin_ia32_compressstoreuqi256_mask", IX86_BUILTIN_PCOMPRESSBSTORE256, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16qi_mask, "__builtin_ia32_compressstoreuqi128_mask", IX86_BUILTIN_PCOMPRESSBSTORE128, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16hi_mask, "__builtin_ia32_compressstoreuhi256_mask", IX86_BUILTIN_PCOMPRESSWSTORE256, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev8hi_mask, "__builtin_ia32_compressstoreuhi128_mask", IX86_BUILTIN_PCOMPRESSWSTORE128, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandloadqi256_mask", IX86_BUILTIN_PEXPANDBLOAD256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandloadqi256_maskz", IX86_BUILTIN_PEXPANDBLOAD256Z, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) @@ -1384,230 +1384,230 @@ BDESC (OPTION_MASK_ISA_BMI2, 0, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si" BDESC (OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64) /* AVX512F */ -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtps2ph512_mask_sae, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movdf_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND) /* Mask arithmetic operations */ BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_kashiftqi, "__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST) @@ -2433,136 +2433,136 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI) /* AVX512DQ. */ -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI) /* AVX512BW. */ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI) -BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI) +BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI) /* AVX512IFMA */ -BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) @@ -2577,13 +2577,13 @@ BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) /* AVX512VBMI */ -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) @@ -2594,16 +2594,16 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) /* VBMI2 */ -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv32qi_mask, "__builtin_ia32_compressqi256_mask", IX86_BUILTIN_PCOMPRESSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16qi_mask, "__builtin_ia32_compressqi128_mask", IX86_BUILTIN_PCOMPRESSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16hi_mask, "__builtin_ia32_compresshi256_mask", IX86_BUILTIN_PCOMPRESSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv8hi_mask, "__builtin_ia32_compresshi128_mask", IX86_BUILTIN_PCOMPRESSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandqi256_mask", IX86_BUILTIN_PEXPANDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandqi256_maskz", IX86_BUILTIN_PEXPANDB256Z, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16qi_mask, "__builtin_ia32_expandqi128_mask", IX86_BUILTIN_PEXPANDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) @@ -2612,64 +2612,64 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expan BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_maskz, "__builtin_ia32_expandhi256_maskz", IX86_BUILTIN_PEXPANDW256Z, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandhi128_mask", IX86_BUILTIN_PEXPANDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandhi128_maskz", IX86_BUILTIN_PEXPANDW128Z, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi, "__builtin_ia32_vpshrd_v16hi", IX86_BUILTIN_VPSHRDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi_mask, "__builtin_ia32_vpshrd_v16hi_mask", IX86_BUILTIN_VPSHRDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi, "__builtin_ia32_vpshrd_v8hi", IX86_BUILTIN_VPSHRDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi_mask, "__builtin_ia32_vpshrd_v8hi_mask", IX86_BUILTIN_VPSHRDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si, "__builtin_ia32_vpshrd_v8si", IX86_BUILTIN_VPSHRDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si_mask, "__builtin_ia32_vpshrd_v8si_mask", IX86_BUILTIN_VPSHRDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si, "__builtin_ia32_vpshrd_v4si", IX86_BUILTIN_VPSHRDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si_mask, "__builtin_ia32_vpshrd_v4si_mask", IX86_BUILTIN_VPSHRDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di, "__builtin_ia32_vpshrd_v4di", IX86_BUILTIN_VPSHRDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di_mask, "__builtin_ia32_vpshrd_v4di_mask", IX86_BUILTIN_VPSHRDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di, "__builtin_ia32_vpshrd_v2di", IX86_BUILTIN_VPSHRDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di_mask, "__builtin_ia32_vpshrd_v2di_mask", IX86_BUILTIN_VPSHRDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi, "__builtin_ia32_vpshld_v16hi", IX86_BUILTIN_VPSHLDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi_mask, "__builtin_ia32_vpshld_v16hi_mask", IX86_BUILTIN_VPSHLDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi, "__builtin_ia32_vpshld_v8hi", IX86_BUILTIN_VPSHLDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi_mask, "__builtin_ia32_vpshld_v8hi_mask", IX86_BUILTIN_VPSHLDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si, "__builtin_ia32_vpshld_v8si", IX86_BUILTIN_VPSHLDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si_mask, "__builtin_ia32_vpshld_v8si_mask", IX86_BUILTIN_VPSHLDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si, "__builtin_ia32_vpshld_v4si", IX86_BUILTIN_VPSHLDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si_mask, "__builtin_ia32_vpshld_v4si_mask", IX86_BUILTIN_VPSHLDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di, "__builtin_ia32_vpshld_v4di", IX86_BUILTIN_VPSHLDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di_mask, "__builtin_ia32_vpshld_v4di_mask", IX86_BUILTIN_VPSHLDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di, "__builtin_ia32_vpshld_v2di", IX86_BUILTIN_VPSHLDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) @@ -2677,27 +2677,27 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshr BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) @@ -2706,20 +2706,20 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshl BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) /* GFNI */ -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineinvqb_v32qi, "__builtin_ia32_vgf2p8affineinvqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEINVQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v32qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineinvqb_v16qi, "__builtin_ia32_vgf2p8affineinvqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEINVQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineinvqb_v16qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineqb_v32qi, "__builtin_ia32_vgf2p8affineqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v32qi_mask, "__builtin_ia32_vgf2p8affineqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineqb_v16qi, "__builtin_ia32_vgf2p8affineqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineqb_v16qi_mask, "__builtin_ia32_vgf2p8affineqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8mulb_v32qi, "__builtin_ia32_vgf2p8mulb_v32qi", IX86_BUILTIN_VGF2P8MULB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v32qi_mask, "__builtin_ia32_vgf2p8mulb_v32qi_mask", IX86_BUILTIN_VGF2P8MULB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) @@ -2727,9 +2727,9 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v /* AVX512_VNNI */ -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) @@ -2737,9 +2737,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) @@ -2747,9 +2747,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) @@ -2757,9 +2757,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) @@ -2798,13 +2798,13 @@ BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpd /* VPCLMULQDQ */ BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) -BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) /* VPOPCNTDQ */ -BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI) -BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI) -BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI) BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI) @@ -2816,21 +2816,21 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_v BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI) /* BITALG */ -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI) -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI) -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv16qi_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI) @@ -2840,39 +2840,39 @@ BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_B /* VAES. */ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) /* BF16 */ -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf, "__builtin_ia32_cvtne2ps2bf16_v16bf", IX86_BUILTIN_CVTNE2PS2BF16_V16BF, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_mask, "__builtin_ia32_cvtne2ps2bf16_v16bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASK, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_V16BF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v16bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf, "__builtin_ia32_cvtne2ps2bf16_v8bf", IX86_BUILTIN_CVTNE2PS2BF16_V8BF, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_mask, "__builtin_ia32_cvtne2ps2bf16_v8bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_V8BF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v8bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2BF16_V8SF, UNKNOWN, (int) V8BF_FTYPE_V8SF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V8SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V8SF_V8BF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2BF16_V4SF, UNKNOWN, (int) V8BF_FTYPE_V4SF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V4SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V8BF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V4SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI) +BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPBF16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPBF16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPBF16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI) @@ -2885,40 +2885,40 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_extendbfsf2_1, "__builtin_ia32_cvtbf2sf /* AVX512FP16. */ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_SUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_SUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_MULPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_MULPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_DIVPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_DIVPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_ADDSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_SUBSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_MULSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_DIVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_MAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_MAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_MINPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_MINPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_MAXSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_MINSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_CMPPH128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_CMPPH256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_SQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_SQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_RSQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_RSQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_RSQRTSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_RCPPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_RCPPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_RCPSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_SCALEFPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_SCALEFPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI) @@ -2928,7 +2928,7 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_RNDSCALEPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_vmfpclassv8hf_mask, "__builtin_ia32_fpclasssh_mask", IX86_BUILTIN_FPCLASSSH_MASK, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getexpv16hf_mask, "__builtin_ia32_getexpph256_mask", IX86_BUILTIN_GETEXPPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI) BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI) @@ -3366,26 +3366,26 @@ BDESC_END (ARGS, ROUND_ARGS) /* AVX512F. */ BDESC_FIRST (round_args, ROUND_ARGS, - OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) + OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask_round", IX86_BUILTIN_ADDSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask_round", IX86_BUILTIN_ADDSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT) @@ -3393,72 +3393,72 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_ BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask_round", IX86_BUILTIN_DIVSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask_round", IX86_BUILTIN_DIVSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_mask_round, "__builtin_ia32_getexpsd_mask_round", IX86_BUILTIN_GETEXPSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_mask_round, "__builtin_ia32_getexpss_mask_round", IX86_BUILTIN_GETEXPSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_mask_round, "__builtin_ia32_getmantsd_mask_round", IX86_BUILTIN_GETMANTSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_mask_round, "__builtin_ia32_getmantss_mask_round", IX86_BUILTIN_GETMANTSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask_round", IX86_BUILTIN_MAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask_round", IX86_BUILTIN_MAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask_round", IX86_BUILTIN_MINSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask_round", IX86_BUILTIN_MINSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask_round", IX86_BUILTIN_MULSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask_round", IX86_BUILTIN_SUBSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) @@ -3479,12 +3479,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_sse_cvttss2si_round, "__built BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) @@ -3495,100 +3495,100 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, "__ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) /* AVX512DQ. */ -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask_round, "__builtin_ia32_reducesd_mask_round", IX86_BUILTIN_REDUCESD128_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask_round, "__builtin_ia32_reducess_mask_round", IX86_BUILTIN_REDUCESS128_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT) /* AVX512FP16. */ -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_ADDSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_SUBSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_MULSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_DIVSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_MAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_MINSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_CMPSH_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_SQRTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_SCALEFSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_REDUCESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_RNDSCALESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT) BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT) @@ -3601,32 +3601,32 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__b BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT) BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask_round, "__builtin_ia32_vfmaddsh3_mask", IX86_BUILTIN_VFMADDSH3_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask3_round, "__builtin_ia32_vfmaddsh3_mask3", IX86_BUILTIN_VFMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_maskz_round, "__builtin_ia32_vfmaddsh3_maskz", IX86_BUILTIN_VFMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) @@ -3634,18 +3634,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask_round BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask3_round, "__builtin_ia32_vfnmaddsh3_mask3", IX86_BUILTIN_VFNMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_maskz_round, "__builtin_ia32_vfnmaddsh3_maskz", IX86_BUILTIN_VFNMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmsub_v8hf_mask3_round, "__builtin_ia32_vfmsubsh3_mask3", IX86_BUILTIN_VFMSUBSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT) -BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fma_fcmaddcsh_v8hf_round, "__builtin_ia32_vfcmaddcsh_round", IX86_BUILTIN_VFCMADDCSH_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask1_round, "__builtin_ia32_vfcmaddcsh_mask_round", IX86_BUILTIN_VFCMADDCSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask3_round, "__builtin_ia32_vfcmaddcsh_mask3_round", IX86_BUILTIN_VFCMADDCSH_MASK3_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index 2e7381b..4835b94 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -801,102 +801,102 @@ ix86_init_mmx_sse_builtins (void) IX86_BUILTIN_GATHERALTDIV8SI); /* AVX512F */ - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16sf", V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT, IX86_BUILTIN_GATHER3SIV16SF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8df", V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT, IX86_BUILTIN_GATHER3SIV8DF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16sf", V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV16SF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8df", V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV8DF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16si", V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT, IX86_BUILTIN_GATHER3SIV16SI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8di", V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT, IX86_BUILTIN_GATHER3SIV8DI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16si", V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV16SI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8di", V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT, IX86_BUILTIN_GATHER3DIV8DI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8df ", V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, IX86_BUILTIN_GATHER3ALTSIV8DF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16sf ", V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, IX86_BUILTIN_GATHER3ALTDIV16SF); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8di ", V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, IX86_BUILTIN_GATHER3ALTSIV8DI); - def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16si ", V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, IX86_BUILTIN_GATHER3ALTDIV16SI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16sf", VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT, IX86_BUILTIN_SCATTERSIV16SF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8df", VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT, IX86_BUILTIN_SCATTERSIV8DF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16sf", VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT, IX86_BUILTIN_SCATTERDIV16SF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8df", VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT, IX86_BUILTIN_SCATTERDIV8DF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16si", VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT, IX86_BUILTIN_SCATTERSIV16SI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8di", VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT, IX86_BUILTIN_SCATTERSIV8DI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16si", VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT, IX86_BUILTIN_SCATTERDIV16SI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8di", VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT, IX86_BUILTIN_SCATTERDIV8DI); @@ -1046,22 +1046,22 @@ ix86_init_mmx_sse_builtins (void) VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT, IX86_BUILTIN_SCATTERDIV2DI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8df ", VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, IX86_BUILTIN_SCATTERALTSIV8DF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16sf ", VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, IX86_BUILTIN_SCATTERALTDIV16SF); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8di ", VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, IX86_BUILTIN_SCATTERALTSIV8DI); - def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, + def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16si ", VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, IX86_BUILTIN_SCATTERALTDIV16SI); @@ -1676,7 +1676,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype, enum ix86_builtins code; const machine_mode mode = TYPE_MODE (TREE_TYPE (mem_vectype)); - if ((!TARGET_AVX512F || !TARGET_EVEX512) && GET_MODE_SIZE (mode) == 64) + if (!TARGET_AVX512F && GET_MODE_SIZE (mode) == 64) return NULL_TREE; if (! TARGET_AVX2 diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index 0a320ca..457aa05 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -729,12 +729,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__SHA512__"); if (isa_flag2 & OPTION_MASK_ISA2_SM4) def_or_undef (parse_in, "__SM4__"); - if (isa_flag2 & OPTION_MASK_ISA2_EVEX512) - def_or_undef (parse_in, "__EVEX512__"); if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR) def_or_undef (parse_in, "__USER_MSR__"); - if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256) - def_or_undef (parse_in, "__AVX10_1_256__"); if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1) def_or_undef (parse_in, "__AVX10_1__"); if (isa_flag2 & OPTION_MASK_ISA2_APX_F) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index cdfd94d..8f15c1c 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -3396,8 +3396,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) too common scenario. */ start_sequence (); compare_op = ix86_expand_fp_compare (code, op0, op1); - compare_seq = get_insns (); - end_sequence (); + compare_seq = end_sequence (); if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode) code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); @@ -3561,8 +3560,7 @@ ix86_expand_int_movcc (rtx operands[]) start_sequence (); compare_op = ix86_expand_compare (code, op0, op1); - compare_seq = get_insns (); - end_sequence (); + compare_seq = end_sequence (); compare_code = GET_CODE (compare_op); @@ -3611,7 +3609,11 @@ ix86_expand_int_movcc (rtx operands[]) negate_cc_compare_p = true; } - diff = ct - cf; + diff = (unsigned HOST_WIDE_INT) ct - cf; + /* Make sure we can represent the difference between the two values. */ + if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct)) + return false; + /* Sign bit compares are better done using shifts than we do by using sbb. */ if (sign_bit_compare_p @@ -3669,7 +3671,12 @@ ix86_expand_int_movcc (rtx operands[]) PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); } - diff = ct - cf; + + diff = (unsigned HOST_WIDE_INT) ct - cf; + /* Make sure we can represent the difference + between the two values. */ + if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct)) + return false; if (reg_overlap_mentioned_p (out, compare_op)) tmp = gen_reg_rtx (mode); @@ -3687,7 +3694,12 @@ ix86_expand_int_movcc (rtx operands[]) else { std::swap (ct, cf); - diff = ct - cf; + + diff = (unsigned HOST_WIDE_INT) ct - cf; + /* Make sure we can represent the difference + between the two values. */ + if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct)) + return false; } tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); } @@ -3754,9 +3766,15 @@ ix86_expand_int_movcc (rtx operands[]) tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); } + HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct; + /* Make sure we can represent the difference + between the two values. */ + if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf)) + return false; + tmp = expand_simple_binop (mode, AND, copy_rtx (tmp), - gen_int_mode (cf - ct, mode), + gen_int_mode (ival, mode), copy_rtx (tmp), 1, OPTAB_DIRECT); if (ct) tmp = expand_simple_binop (mode, PLUS, @@ -3793,7 +3811,13 @@ ix86_expand_int_movcc (rtx operands[]) if (new_code != UNKNOWN) { std::swap (ct, cf); - diff = -diff; + + diff = (unsigned HOST_WIDE_INT) ct - cf; + /* Make sure we can represent the difference + between the two values. */ + if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct)) + return false; + code = new_code; } } @@ -3996,8 +4020,14 @@ ix86_expand_int_movcc (rtx operands[]) copy_rtx (out), 1, OPTAB_DIRECT); } + HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct; + /* Make sure we can represent the difference + between the two values. */ + if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf)) + return false; + out = expand_simple_binop (mode, AND, copy_rtx (out), - gen_int_mode (cf - ct, mode), + gen_int_mode (ival, mode), copy_rtx (out), 1, OPTAB_DIRECT); if (ct) out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), @@ -4138,6 +4168,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, return false; mode = GET_MODE (dest); + if (immediate_operand (if_false, mode)) + if_false = force_reg (mode, if_false); + if (immediate_operand (if_true, mode)) + if_true = force_reg (mode, if_true); /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, but MODE may be a vector mode and thus not appropriate. */ @@ -4186,7 +4220,7 @@ ix86_valid_mask_cmp_mode (machine_mode mode) if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW) return false; - return (vector_size == 64 && TARGET_EVEX512) || TARGET_AVX512VL; + return vector_size == 64 || TARGET_AVX512VL; } /* Return true if integer mask comparison should be used. */ @@ -4687,6 +4721,8 @@ ix86_expand_fp_movcc (rtx operands[]) compare_op = ix86_expand_compare (NE, tmp, const0_rtx); } + operands[2] = force_reg (mode, operands[2]); + operands[3] = force_reg (mode, operands[3]); emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (mode, compare_op, operands[2], operands[3]))); @@ -5022,7 +5058,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4 /* Don't do it if not using integer masks and we'd end up with the right values in the registers though. */ - && ((GET_MODE_SIZE (mode) == 64 && TARGET_EVEX512) + && (GET_MODE_SIZE (mode) == 64 || !vector_all_ones_operand (optrue, data_mode) || opfalse != CONST0_RTX (data_mode)))) { @@ -7863,7 +7899,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem, rtx count, machine_mode mode, int unroll, int expected_size, bool issetmem) { - rtx_code_label *out_label, *top_label; + rtx_code_label *out_label = nullptr; + rtx_code_label *top_label = nullptr; rtx iter, tmp; machine_mode iter_mode = counter_mode (count); int piece_size_n = GET_MODE_SIZE (mode) * unroll; @@ -7871,9 +7908,19 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem, rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); rtx size; int i; + int loop_count; - top_label = gen_label_rtx (); - out_label = gen_label_rtx (); + if (expected_size != -1 && CONST_INT_P (count)) + loop_count = INTVAL (count) / GET_MODE_SIZE (mode) / unroll; + else + loop_count = -1; + + /* Don't generate the loop if the loop count is 1. */ + if (loop_count != 1) + { + top_label = gen_label_rtx (); + out_label = gen_label_rtx (); + } iter = gen_reg_rtx (iter_mode); size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, @@ -7887,7 +7934,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem, } emit_move_insn (iter, const0_rtx); - emit_label (top_label); + if (loop_count != 1) + emit_label (top_label); tmp = convert_modes (Pmode, iter_mode, iter, true); @@ -7955,21 +8003,25 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem, if (tmp != iter) emit_move_insn (iter, tmp); - emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, - true, top_label); - if (expected_size != -1) + if (loop_count != 1) { - expected_size /= GET_MODE_SIZE (mode) * unroll; - if (expected_size == 0) - predict_jump (0); - else if (expected_size > REG_BR_PROB_BASE) - predict_jump (REG_BR_PROB_BASE - 1); + emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, + true, top_label); + if (expected_size != -1) + { + expected_size /= GET_MODE_SIZE (mode) * unroll; + if (expected_size == 0) + predict_jump (0); + else if (expected_size > REG_BR_PROB_BASE) + predict_jump (REG_BR_PROB_BASE - 1); + else + predict_jump (REG_BR_PROB_BASE + - (REG_BR_PROB_BASE + expected_size / 2) + / expected_size); + } else - predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) - / expected_size); + predict_jump (REG_BR_PROB_BASE * 80 / 100); } - else - predict_jump (REG_BR_PROB_BASE * 80 / 100); iter = ix86_zero_extend_to_Pmode (iter); tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, true, OPTAB_LIB_WIDEN); @@ -7982,7 +8034,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem, if (tmp != srcptr) emit_move_insn (srcptr, tmp); } - emit_label (out_label); + if (loop_count != 1) + emit_label (out_label); } /* Divide COUNTREG by SCALE. */ @@ -8185,19 +8238,11 @@ expand_cpymem_epilogue (rtx destmem, rtx srcmem, rtx src, dest; if (CONST_INT_P (count)) { - HOST_WIDE_INT countval = INTVAL (count); - HOST_WIDE_INT epilogue_size = countval % max_size; - int i; - - /* For now MAX_SIZE should be a power of 2. This assert could be - relaxed, but it'll require a bit more complicated epilogue - expanding. */ - gcc_assert ((max_size & (max_size - 1)) == 0); - for (i = max_size; i >= 1; i >>= 1) - { - if (epilogue_size & i) - destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); - } + unsigned HOST_WIDE_INT countval = UINTVAL (count); + unsigned HOST_WIDE_INT epilogue_size = countval % max_size; + unsigned int destalign = MEM_ALIGN (destmem); + move_by_pieces (destmem, srcmem, epilogue_size, destalign, + RETURN_BEGIN); return; } if (max_size > 8) @@ -8358,6 +8403,81 @@ expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, 1, max_size / 2, true); } +/* Callback routine for store_by_pieces. Return the RTL of a register + containing GET_MODE_SIZE (MODE) bytes in the RTL register op_p which + is a word or a word vector register. If PREV_P isn't nullptr, it + has the RTL info from the previous iteration. */ + +static rtx +setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT, + fixed_size_mode mode) +{ + rtx target; + by_pieces_prev *prev = (by_pieces_prev *) prev_p; + if (prev) + { + rtx prev_op = prev->data; + if (prev_op) + { + machine_mode prev_mode = GET_MODE (prev_op); + if (prev_mode == mode) + return prev_op; + if (VECTOR_MODE_P (prev_mode) + && VECTOR_MODE_P (mode) + && GET_MODE_INNER (prev_mode) == GET_MODE_INNER (mode)) + { + target = gen_rtx_SUBREG (mode, prev_op, 0); + return target; + } + } + } + + rtx op = (rtx) op_p; + machine_mode op_mode = GET_MODE (op); + + gcc_assert (op_mode == word_mode + || (VECTOR_MODE_P (op_mode) + && GET_MODE_INNER (op_mode) == word_mode)); + + if (VECTOR_MODE_P (mode)) + { + gcc_assert (GET_MODE_INNER (mode) == QImode); + + unsigned int op_size = GET_MODE_SIZE (op_mode); + unsigned int size = GET_MODE_SIZE (mode); + unsigned int nunits = op_size / GET_MODE_SIZE (QImode); + machine_mode vec_mode + = mode_for_vector (QImode, nunits).require (); + target = gen_reg_rtx (vec_mode); + op = gen_rtx_SUBREG (vec_mode, op, 0); + emit_move_insn (target, op); + if (op_size == size) + return target; + + rtx tmp = gen_reg_rtx (mode); + target = gen_rtx_SUBREG (mode, target, 0); + emit_move_insn (tmp, target); + return tmp; + } + + target = gen_reg_rtx (word_mode); + if (VECTOR_MODE_P (op_mode)) + { + op = gen_rtx_SUBREG (word_mode, op, 0); + emit_move_insn (target, op); + } + else + target = op; + + if (mode == word_mode) + return target; + + rtx tmp = gen_reg_rtx (mode); + target = gen_rtx_SUBREG (mode, target, 0); + emit_move_insn (tmp, target); + return tmp; +} + /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ static void expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, @@ -8367,24 +8487,12 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, if (CONST_INT_P (count)) { - HOST_WIDE_INT countval = INTVAL (count); - HOST_WIDE_INT epilogue_size = countval % max_size; - int i; - - /* For now MAX_SIZE should be a power of 2. This assert could be - relaxed, but it'll require a bit more complicated epilogue - expanding. */ - gcc_assert ((max_size & (max_size - 1)) == 0); - for (i = max_size; i >= 1; i >>= 1) - { - if (epilogue_size & i) - { - if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) - destmem = emit_memset (destmem, destptr, vec_value, i); - else - destmem = emit_memset (destmem, destptr, value, i); - } - } + unsigned HOST_WIDE_INT countval = UINTVAL (count); + unsigned HOST_WIDE_INT epilogue_size = countval % max_size; + unsigned int destalign = MEM_ALIGN (destmem); + store_by_pieces (destmem, epilogue_size, setmem_epilogue_gen_val, + vec_value ? vec_value : value, destalign, true, + RETURN_BEGIN); return; } if (max_size > 32) @@ -8516,6 +8624,7 @@ expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem, rtx_code_label *label = ix86_expand_aligntest (count, size, false); machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk (); rtx modesize; + rtx scalar_value = value; int n; /* If we do not have vector value to copy, we must reduce size. */ @@ -8535,11 +8644,57 @@ expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem, { /* Choose appropriate vector mode. */ if (size >= 32) - mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode; + switch (MOVE_MAX) + { + case 64: + if (size >= 64) + { + mode = V64QImode; + break; + } + /* FALLTHRU */ + case 32: + mode = V32QImode; + break; + case 16: + mode = V16QImode; + break; + case 8: + mode = DImode; + break; + default: + gcc_unreachable (); + } else if (size >= 16) mode = TARGET_SSE ? V16QImode : DImode; srcmem = change_address (srcmem, mode, srcptr); } + if (issetmem && vec_value && GET_MODE_SIZE (mode) > size) + { + /* For memset with vector and the size is smaller than the vector + size, first try the narrower vector, otherwise, use the + original value. */ + machine_mode inner_mode = GET_MODE_INNER (mode); + unsigned int nunits = size / GET_MODE_SIZE (inner_mode); + if (nunits > 1) + { + mode = mode_for_vector (GET_MODE_INNER (mode), + nunits).require (); + value = gen_rtx_SUBREG (mode, value, 0); + } + else + { + scalar_int_mode smode + = smallest_int_mode_for_size (size * BITS_PER_UNIT).require (); + gcc_assert (GET_MODE_SIZE (GET_MODE (scalar_value)) + >= GET_MODE_SIZE (smode)); + mode = smode; + if (GET_MODE (scalar_value) == mode) + value = scalar_value; + else + value = gen_rtx_SUBREG (mode, scalar_value, 0); + } + } destmem = change_address (destmem, mode, destptr); modesize = GEN_INT (GET_MODE_SIZE (mode)); gcc_assert (GET_MODE_SIZE (mode) <= size); @@ -8901,31 +9056,34 @@ expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, /* Return true if ALG can be used in current context. Assume we expand memset if MEMSET is true. */ static bool -alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) +alg_usable_p (enum stringop_alg alg, bool memset, + addr_space_t dst_as, addr_space_t src_as) { if (alg == no_stringop) return false; /* It is not possible to use a library call if we have non-default address space. We can do better than the generic byte-at-a-time loop, used as a fallback. */ - if (alg == libcall && have_as) + if (alg == libcall && + !(ADDR_SPACE_GENERIC_P (dst_as) && ADDR_SPACE_GENERIC_P (src_as))) return false; if (alg == vector_loop) return TARGET_SSE || TARGET_AVX; /* Algorithms using the rep prefix want at least edi and ecx; additionally, memset wants eax and memcpy wants esi. Don't consider such algorithms if the user has appropriated those - registers for their own purposes, or if we have a non-default - address space, since some string insns cannot override the segment. */ + registers for their own purposes, or if we have the destination + in the non-default address space, since string insns cannot + override the destination segment. */ if (alg == rep_prefix_1_byte || alg == rep_prefix_4_byte || alg == rep_prefix_8_byte) { - if (have_as) - return false; if (fixed_regs[CX_REG] || fixed_regs[DI_REG] - || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) + || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]) + || !ADDR_SPACE_GENERIC_P (dst_as) + || !(ADDR_SPACE_GENERIC_P (src_as) || Pmode == word_mode)) return false; } return true; @@ -8935,8 +9093,8 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) static enum stringop_alg decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, - bool memset, bool zero_memset, bool have_as, - int *dynamic_check, bool *noalign, bool recur) + bool memset, bool zero_memset, addr_space_t dst_as, + addr_space_t src_as, int *dynamic_check, bool *noalign, bool recur) { const struct stringop_algs *algs; bool optimize_for_speed; @@ -8968,7 +9126,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, for (i = 0; i < MAX_STRINGOP_ALGS; i++) { enum stringop_alg candidate = algs->size[i].alg; - bool usable = alg_usable_p (candidate, memset, have_as); + bool usable = alg_usable_p (candidate, memset, dst_as, src_as); any_alg_usable_p |= usable; if (candidate != libcall && candidate && usable) @@ -8984,17 +9142,17 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, /* If user specified the algorithm, honor it if possible. */ if (ix86_stringop_alg != no_stringop - && alg_usable_p (ix86_stringop_alg, memset, have_as)) + && alg_usable_p (ix86_stringop_alg, memset, dst_as, src_as)) return ix86_stringop_alg; /* rep; movq or rep; movl is the smallest variant. */ else if (!optimize_for_speed) { *noalign = true; if (!count || (count & 3) || (memset && !zero_memset)) - return alg_usable_p (rep_prefix_1_byte, memset, have_as) + return alg_usable_p (rep_prefix_1_byte, memset, dst_as, src_as) ? rep_prefix_1_byte : loop_1_byte; else - return alg_usable_p (rep_prefix_4_byte, memset, have_as) + return alg_usable_p (rep_prefix_4_byte, memset, dst_as, src_as) ? rep_prefix_4_byte : loop; } /* Very tiny blocks are best handled via the loop, REP is expensive to @@ -9018,7 +9176,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, enum stringop_alg candidate = algs->size[i].alg; if (candidate != libcall - && alg_usable_p (candidate, memset, have_as)) + && alg_usable_p (candidate, memset, dst_as, src_as)) { alg = candidate; alg_noalign = algs->size[i].noalign; @@ -9038,7 +9196,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, else if (!any_alg_usable_p) break; } - else if (alg_usable_p (candidate, memset, have_as) + else if (alg_usable_p (candidate, memset, dst_as, src_as) && !(TARGET_PREFER_KNOWN_REP_MOVSB_STOSB && candidate == rep_prefix_1_byte /* NB: If min_size != max_size, size is @@ -9060,7 +9218,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, choice in ix86_costs. */ if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) && (algs->unknown_size == libcall - || !alg_usable_p (algs->unknown_size, memset, have_as))) + || !alg_usable_p (algs->unknown_size, memset, dst_as, src_as))) { enum stringop_alg alg; HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; @@ -9075,8 +9233,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *dynamic_check = 128; return loop_1_byte; } - alg = decide_alg (count, new_expected_size, min_size, max_size, memset, - zero_memset, have_as, dynamic_check, noalign, true); + alg = decide_alg (count, new_expected_size, min_size, max_size, + memset, zero_memset, dst_as, src_as, + dynamic_check, noalign, true); gcc_assert (*dynamic_check == -1); if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) *dynamic_check = max; @@ -9088,7 +9247,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, /* Try to use some reasonable fallback algorithm. Note that for non-default address spaces we default to a loop instead of a libcall. */ - return (alg_usable_p (algs->unknown_size, memset, have_as) + + bool have_as = !(ADDR_SPACE_GENERIC_P (dst_as) + && ADDR_SPACE_GENERIC_P (src_as)); + + return (alg_usable_p (algs->unknown_size, memset, dst_as, src_as) ? algs->unknown_size : have_as ? loop : libcall); } @@ -9135,13 +9298,26 @@ decide_alignment (int align, static rtx promote_duplicated_reg (machine_mode mode, rtx val) { + if (val == const0_rtx) + return copy_to_mode_reg (mode, CONST0_RTX (mode)); + machine_mode valmode = GET_MODE (val); + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* Duplicate the scalar value for integer vector. */ + gcc_assert ((val == const0_rtx || val == constm1_rtx) + || GET_MODE_INNER (mode) == valmode); + rtx dup = gen_reg_rtx (mode); + bool ok = ix86_expand_vector_init_duplicate (false, mode, dup, + val); + gcc_assert (ok); + return dup; + } + rtx tmp; int nops = mode == DImode ? 3 : 2; - gcc_assert (mode == SImode || mode == DImode || val == const0_rtx); - if (val == const0_rtx) - return copy_to_mode_reg (mode, CONST0_RTX (mode)); + gcc_assert (mode == SImode || mode == DImode); if (CONST_INT_P (val)) { HOST_WIDE_INT v = INTVAL (val) & 255; @@ -9307,14 +9483,13 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, bool need_zero_guard = false; bool noalign; machine_mode move_mode = VOIDmode; - machine_mode wider_mode; int unroll_factor = 1; /* TODO: Once value ranges are available, fill in proper data. */ unsigned HOST_WIDE_INT min_size = 0; unsigned HOST_WIDE_INT max_size = -1; unsigned HOST_WIDE_INT probable_max_size = -1; bool misaligned_prologue_used = false; - bool have_as; + addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC; if (CONST_INT_P (align_exp)) align = INTVAL (align_exp); @@ -9352,16 +9527,15 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, if (count > (HOST_WIDE_INT_1U << 30)) return false; - have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); + dst_as = MEM_ADDR_SPACE (dst); if (!issetmem) - have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); + src_as = MEM_ADDR_SPACE (src); /* Step 0: Decide on preferred algorithm, desired alignment and size of chunks to be copied by main loop. */ alg = decide_alg (count, expected_size, min_size, probable_max_size, - issetmem, - issetmem && val_exp == const0_rtx, have_as, - &dynamic_check, &noalign, false); + issetmem, issetmem && val_exp == const0_rtx, + dst_as, src_as, &dynamic_check, &noalign, false); if (dump_file) fprintf (dump_file, "Selected stringop expansion strategy: %s\n", @@ -9371,11 +9545,6 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, return false; gcc_assert (alg != no_stringop); - /* For now vector-version of memset is generated only for memory zeroing, as - creating of promoted vector value is very cheap in this case. */ - if (issetmem && alg == vector_loop && val_exp != const0_rtx) - alg = unrolled_loop; - if (!count) count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); destreg = ix86_copy_addr_to_reg (XEXP (dst, 0)); @@ -9384,6 +9553,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, unroll_factor = 1; move_mode = word_mode; + int nunits; switch (alg) { case libcall: @@ -9404,27 +9574,14 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, case vector_loop: need_zero_guard = true; unroll_factor = 4; - /* Find the widest supported mode. */ - move_mode = word_mode; - while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode) - && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing) - move_mode = wider_mode; - - if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128) - move_mode = TImode; - if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256) - move_mode = OImode; - - /* Find the corresponding vector mode with the same size as MOVE_MODE. - MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ - if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) + /* Get the vector mode to move MOVE_MAX bytes. */ + nunits = MOVE_MAX / GET_MODE_SIZE (word_mode); + if (nunits > 1) { - int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); - if (!mode_for_vector (word_mode, nunits).exists (&move_mode) - || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing) - move_mode = word_mode; + move_mode = mode_for_vector (word_mode, nunits).require (); + gcc_assert (optab_handler (mov_optab, move_mode) + != CODE_FOR_nothing); } - gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing); break; case rep_prefix_8_byte: move_mode = DImode; @@ -9480,20 +9637,41 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, && ((desired_align > align && !align_bytes) || (!count && epilogue_size_needed > 1))); + /* Destination is aligned after the misaligned prologue. */ + bool aligned_dstmem = misaligned_prologue_used; + + if (noalign && !misaligned_prologue_used) + { + /* Also use misaligned prologue if alignment isn't needed and + destination isn't aligned. Since alignment isn't needed, + the destination after prologue won't be aligned. */ + aligned_dstmem = (GET_MODE_ALIGNMENT (move_mode) + <= MEM_ALIGN (dst)); + if (!aligned_dstmem) + misaligned_prologue_used = true; + } + /* Do the cheap promotion to allow better CSE across the main loop and epilogue (ie one load of the big constant in the front of all code. For now the misaligned move sequences do not have fast path without broadcasting. */ - if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) + if (issetmem + && (alg == vector_loop + || CONST_INT_P (val_exp) + || misaligned_prologue_used)) { if (alg == vector_loop) { - gcc_assert (val_exp == const0_rtx); - vec_promoted_val = promote_duplicated_reg (move_mode, val_exp); promoted_val = promote_duplicated_reg_to_size (val_exp, GET_MODE_SIZE (word_mode), desired_align, align); + /* Duplicate the promoted scalar value if not 0 nor -1. */ + vec_promoted_val + = promote_duplicated_reg (move_mode, + (val_exp == const0_rtx + || val_exp == constm1_rtx) + ? val_exp : promoted_val); } else { @@ -9518,7 +9696,8 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, if (!issetmem) src = change_address (src, BLKmode, srcreg); dst = change_address (dst, BLKmode, destreg); - set_mem_align (dst, desired_align * BITS_PER_UNIT); + if (aligned_dstmem) + set_mem_align (dst, desired_align * BITS_PER_UNIT); epilogue_size_needed = 0; if (need_zero_guard && min_size < (unsigned HOST_WIDE_INT) size_needed) @@ -10108,9 +10287,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, if (lookup_attribute ("interrupt", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) error ("interrupt service routine cannot be called directly"); - else if (lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + else if (ix86_type_no_callee_saved_registers_p (TREE_TYPE (fndecl))) call_no_callee_saved_registers = true; + if (fndecl == current_function_decl + && decl_binds_to_current_def_p (fndecl)) + cfun->machine->recursive_function = true; } } else @@ -10120,8 +10301,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, tree mem_expr = MEM_EXPR (fnaddr); if (mem_expr != nullptr && TREE_CODE (mem_expr) == MEM_REF - && lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (mem_expr)))) + && ix86_type_no_callee_saved_registers_p (TREE_TYPE (mem_expr))) call_no_callee_saved_registers = true; } @@ -10346,6 +10526,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi); for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) if (!fixed_regs[i] + && i != HARD_FRAME_POINTER_REGNUM && !(ix86_call_used_regs[i] == 1 || (ix86_call_used_regs[i] & c_mask)) && !STACK_REGNO_P (i) @@ -11244,6 +11425,54 @@ fixup_modeless_constant (rtx x, machine_mode mode) return x; } +/* Expand the outgoing argument ARG to extract unsigned char and short + integer constants suitable for the predicates and the instruction + templates which expect the unsigned expanded value. */ + +static rtx +ix86_expand_unsigned_small_int_cst_argument (tree arg) +{ + /* When passing 0xff as an unsigned char function argument with the + C frontend promotion, expand_normal gets + + <integer_cst 0x7fffe6aa23a8 type <integer_type 0x7fffe98225e8 int> constant 255> + + and returns the rtx value using the sign-extended representation: + + (const_int 255 [0xff]) + + Without the C frontend promotion, expand_normal gets + + <integer_cst 0x7fffe9824018 type <integer_type 0x7fffe9822348 unsigned char > constant 255> + + and returns + + (const_int -1 [0xffffffffffffffff]) + + which doesn't work with the predicates nor the instruction templates + which expect the unsigned expanded value. Extract the unsigned char + and short integer constants to return + + (const_int 255 [0xff]) + + so that the expanded value is always unsigned, without the C frontend + promotion. */ + + if (TREE_CODE (arg) == INTEGER_CST) + { + tree type = TREE_TYPE (arg); + if (INTEGRAL_TYPE_P (type) + && TYPE_UNSIGNED (type) + && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node)) + { + HOST_WIDE_INT cst = TREE_INT_CST_LOW (arg); + return GEN_INT (cst); + } + } + + return expand_normal (arg); +} + /* Subroutine of ix86_expand_builtin to take care of insns with variable number of operands. */ @@ -12142,7 +12371,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, for (i = 0; i < nargs; i++) { tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); + rtx op = ix86_expand_unsigned_small_int_cst_argument (arg); machine_mode mode = insn_p->operand[i + 1].mode; /* Need to fixup modeless constant before testing predicate. */ op = fixup_modeless_constant (op, mode); @@ -12837,7 +13066,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, for (i = 0; i < nargs; i++) { tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); + rtx op = ix86_expand_unsigned_small_int_cst_argument (arg); machine_mode mode = insn_p->operand[i + 1].mode; bool match = insn_p->operand[i + 1].predicate (op, mode); @@ -13322,7 +13551,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, machine_mode mode = insn_p->operand[i + 1].mode; arg = CALL_EXPR_ARG (exp, i + arg_adjust); - op = expand_normal (arg); + op = ix86_expand_unsigned_small_int_cst_argument (arg); if (i == memory) { @@ -15466,7 +15695,7 @@ rdseed_step: op0 = expand_normal (arg0); op1 = expand_normal (arg1); op2 = expand_normal (arg2); - op3 = expand_normal (arg3); + op3 = ix86_expand_unsigned_small_int_cst_argument (arg3); op4 = expand_normal (arg4); /* Note the arg order is different from the operand order. */ mode0 = insn_data[icode].operand[1].mode; @@ -15681,7 +15910,7 @@ rdseed_step: arg3 = CALL_EXPR_ARG (exp, 3); arg4 = CALL_EXPR_ARG (exp, 4); op0 = expand_normal (arg0); - op1 = expand_normal (arg1); + op1 = ix86_expand_unsigned_small_int_cst_argument (arg1); op2 = expand_normal (arg2); op3 = expand_normal (arg3); op4 = expand_normal (arg4); @@ -16130,7 +16359,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, { case VEC_BCAST_PXOR: if ((mode == V8SImode && !TARGET_AVX2) - || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512))) + || (mode == V16SImode && !TARGET_AVX512F)) return false; emit_move_insn (target, CONST0_RTX (mode)); return true; @@ -16138,7 +16367,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, case VEC_BCAST_PCMPEQ: if ((mode == V4SImode && !TARGET_SSE2) || (mode == V8SImode && !TARGET_AVX2) - || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512))) + || (mode == V16SImode && !TARGET_AVX512F)) return false; emit_move_insn (target, CONSTM1_RTX (mode)); return true; @@ -16158,7 +16387,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, tmp2 = gen_reg_rtx (V32QImode); emit_insn (gen_absv32qi2 (tmp2, tmp1)); } - else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512BW) { tmp1 = gen_reg_rtx (V64QImode); emit_move_insn (tmp1, CONSTM1_RTX (V64QImode)); @@ -16184,7 +16413,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, tmp2 = gen_reg_rtx (V32QImode); emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1)); } - else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512BW) { tmp1 = gen_reg_rtx (V64QImode); emit_move_insn (tmp1, CONSTM1_RTX (V64QImode)); @@ -16210,7 +16439,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, tmp2 = gen_reg_rtx (V16HImode); emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg))); } - else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512BW) { tmp1 = gen_reg_rtx (V32HImode); emit_move_insn (tmp1, CONSTM1_RTX (V32HImode)); @@ -16236,7 +16465,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg))); return true; } - else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512F) { tmp1 = gen_reg_rtx (V16SImode); emit_move_insn (tmp1, CONSTM1_RTX (V16SImode)); @@ -16262,7 +16491,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, tmp2 = gen_reg_rtx (V16HImode); emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg))); } - else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512BW) { tmp1 = gen_reg_rtx (V32HImode); emit_move_insn (tmp1, CONSTM1_RTX (V32HImode)); @@ -16288,7 +16517,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg))); return true; } - else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512) + else if (mode == V16SImode && TARGET_AVX512F) { tmp1 = gen_reg_rtx (V16SImode); emit_move_insn (tmp1, CONSTM1_RTX (V16SImode)); @@ -16342,8 +16571,7 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) if (GET_MODE (reg) != innermode) reg = gen_lowpart (innermode, reg); SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (seq) emit_insn_before (seq, insn); @@ -16659,7 +16887,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, case E_V32HFmode: case E_V32BFmode: - gcc_assert (TARGET_EVEX512); if (TARGET_AVX512BW) return ix86_vector_duplicate_value (mode, target, val); else @@ -16712,9 +16939,6 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, bool use_vector_set = false; rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; - if (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512) - return false; - switch (mode) { case E_V2DImode: @@ -18670,6 +18894,33 @@ emit_reduc_half (rtx dest, rtx src, int i) case E_V8HFmode: case E_V4SImode: case E_V2DImode: + if (TARGET_SSE_REDUCTION_PREFER_PSHUF) + { + if (i == 128) + { + d = gen_reg_rtx (V4SImode); + tem = gen_sse2_pshufd_1 ( + d, force_reg (V4SImode, gen_lowpart (V4SImode, src)), + GEN_INT (2), GEN_INT (3), GEN_INT (2), GEN_INT (3)); + break; + } + else if (i == 64) + { + d = gen_reg_rtx (V4SImode); + tem = gen_sse2_pshufd_1 ( + d, force_reg (V4SImode, gen_lowpart (V4SImode, src)), + GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1)); + break; + } + else if (i == 32) + { + d = gen_reg_rtx (V8HImode); + tem = gen_sse2_pshuflw_1 ( + d, force_reg (V8HImode, gen_lowpart (V8HImode, src)), + GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1)); + break; + } + } d = gen_reg_rtx (V1TImode); tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src), GEN_INT (i / 2)); @@ -19256,8 +19507,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) e1 = gen_reg_rtx (mode); x1 = gen_reg_rtx (mode); - /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ - b = force_reg (mode, b); /* x0 = rcp(b) estimate */ @@ -19270,20 +19519,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), UNSPEC_RCP))); - /* e0 = x0 * b */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + unsigned vector_size = GET_MODE_SIZE (mode); - /* e0 = x0 * e0 */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); + /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a + N-R step with 2 fma implementation. */ + if (TARGET_FMA + || (TARGET_AVX512F && vector_size == 64) + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) + { + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * b - a */ + emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b, + gen_rtx_NEG (mode, a)))); + /* res = - e1 * x0 + e0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, e1), + x0, e0))); + } + else + /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ + { + /* e0 = x0 * b */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + + /* e1 = x0 + x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); - /* e1 = x0 + x0 */ - emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); + /* e0 = x0 * e0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); - /* x1 = e1 - e0 */ - emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); + /* x1 = e1 - e0 */ + emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); - /* res = a * x1 */ - emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + /* res = a * x1 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + } } /* Output code to perform a Newton-Rhapson approximation of a @@ -19356,7 +19627,7 @@ ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) unsigned vector_size = GET_MODE_SIZE (mode); if (TARGET_FMA - || (TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64) + || (TARGET_AVX512F && vector_size == 64) || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode, e0, x0, mthree))); @@ -22018,8 +22289,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ start_sequence (); ok = expand_vec_perm_1 (&dfinal); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (!ok) return false; @@ -22355,8 +22625,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); if (!ok) return false; @@ -22464,8 +22733,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22475,8 +22743,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -22590,8 +22857,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22601,8 +22867,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn) { start_sequence (); ok = expand_vec_perm_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -22796,8 +23061,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d) canonicalize_perm (&dfirst); start_sequence (); ok = ix86_expand_vec_perm_const_1 (&dfirst); - seq1 = get_insns (); - end_sequence (); + seq1 = end_sequence (); if (!ok) return false; @@ -22805,8 +23069,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d) canonicalize_perm (&dsecond); start_sequence (); ok = ix86_expand_vec_perm_const_1 (&dsecond); - seq2 = get_insns (); - end_sequence (); + seq2 = end_sequence (); if (!ok) return false; @@ -24290,9 +24553,6 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, unsigned int i, nelt, which; bool two_args; - if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512) - return false; - /* For HF and BF mode vector, convert it to HI using subreg. */ if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode) { @@ -24834,7 +25094,6 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2) ix86_expand_vecop_qihi. */ if (!TARGET_AVX512BW || (qimode == V16QImode && !TARGET_AVX512VL) - || (qimode == V32QImode && !TARGET_EVEX512) /* There are no V64HImode instructions. */ || qimode == V64QImode) return false; @@ -25303,7 +25562,7 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) machine_mode mode = GET_MODE (op0); rtx t1, t2, t3, t4, t5, t6; - if (TARGET_AVX512DQ && TARGET_EVEX512 && mode == V8DImode) + if (TARGET_AVX512DQ && mode == V8DImode) emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode) emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); @@ -26033,8 +26292,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, } } - *prep_seq = get_insns (); - end_sequence (); + *prep_seq = end_sequence (); start_sequence (); @@ -26045,8 +26303,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq, end_sequence (); return NULL_RTX; } - *gen_seq = get_insns (); - end_sequence (); + *gen_seq = end_sequence (); return res; } @@ -26089,8 +26346,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, return NULL_RTX; } - *prep_seq = get_insns (); - end_sequence (); + *prep_seq = end_sequence (); target = gen_rtx_REG (cc_mode, FLAGS_REG); dfv = ix86_get_flags_cc ((rtx_code) cmp_code); @@ -26121,8 +26377,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, return NULL_RTX; } - *gen_seq = get_insns (); - end_sequence (); + *gen_seq = end_sequence (); return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx); } @@ -26136,8 +26391,7 @@ ix86_gen_bcst_mem (machine_mode mode, rtx x) { if (!TARGET_AVX512F || !CONST_VECTOR_P (x) - || (!TARGET_AVX512VL - && (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512)) + || (!TARGET_AVX512VL && GET_MODE_SIZE (mode) != 64) || !VALID_BCST_MODE_P (GET_MODE_INNER (mode)) /* Disallow HFmode broadcast. */ || GET_MODE_SIZE (GET_MODE_INNER (mode)) < 4) diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index c35ac24..054f8d5 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -296,9 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) insns_conv = BITMAP_ALLOC (NULL); queue = NULL; - n_sse_to_integer = 0; - n_integer_to_sse = 0; - + cost_sse_integer = 0; + weighted_cost_sse_integer = 0 ; max_visits = x86_stv_max_visits; } @@ -337,20 +336,52 @@ scalar_chain::mark_dual_mode_def (df_ref def) /* Record the def/insn pair so we can later efficiently iterate over the defs to convert on insns not in the chain. */ bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); + basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def)); + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + int cost = 0; + if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def))) { if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def)) && !reg_new) return; - n_integer_to_sse++; + + /* Cost integer to sse moves. */ + if (speed_p) + cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; + else if (TARGET_64BIT || smode == SImode) + cost = COSTS_N_BYTES (4); + /* vmovd (4 bytes) + vpinsrd (6 bytes). */ + else if (TARGET_SSE4_1) + cost = COSTS_N_BYTES (10); + /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ + else + cost = COSTS_N_BYTES (12); } else { if (!reg_new) return; - n_sse_to_integer++; + + /* Cost sse to integer moves. */ + if (speed_p) + cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2; + else if (TARGET_64BIT || smode == SImode) + cost = COSTS_N_BYTES (4); + /* vmovd (4 bytes) + vpextrd (6 bytes). */ + else if (TARGET_SSE4_1) + cost = COSTS_N_BYTES (10); + /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ + else + cost = COSTS_N_BYTES (13); } + if (speed_p) + weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost; + + cost_sse_integer += cost; + if (dump_file) fprintf (dump_file, " Mark r%d def in insn %d as requiring both modes in chain #%d\n", @@ -518,26 +549,28 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed) instead of using a scalar one. */ int -general_scalar_chain::vector_const_cost (rtx exp) +general_scalar_chain::vector_const_cost (rtx exp, basic_block bb) { gcc_assert (CONST_INT_P (exp)); if (standard_sse_constant_p (exp, vmode)) return ix86_cost->sse_op; + if (optimize_bb_for_size_p (bb)) + return COSTS_N_BYTES (8); /* We have separate costs for SImode and DImode, use SImode costs for smaller modes. */ - return ix86_cost->sse_load[smode == DImode ? 1 : 0]; + return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2; } -/* Compute a gain for chain conversion. */ +/* Return true if it's cost profitable for chain conversion. */ -int +bool general_scalar_chain::compute_convert_gain () { bitmap_iterator bi; unsigned insn_uid; int gain = 0; - int cost = 0; + sreal weighted_gain = 0; if (dump_file) fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); @@ -547,7 +580,7 @@ general_scalar_chain::compute_convert_gain () smaller modes than SImode the int load/store costs need to be adjusted as well. */ unsigned sse_cost_idx = smode == DImode ? 1 : 0; - unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1; + int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1; EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) { @@ -555,26 +588,58 @@ general_scalar_chain::compute_convert_gain () rtx def_set = single_set (insn); rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); + basic_block bb = BLOCK_FOR_INSN (insn); int igain = 0; + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + sreal bb_freq = bb->count.to_sreal_scale (entry_count); if (REG_P (src) && REG_P (dst)) - igain += 2 * m - ix86_cost->xmm_move; + { + if (!speed_p) + /* reg-reg move is 2 bytes, while SSE 3. */ + igain += COSTS_N_BYTES (2 * m - 3); + else + /* Move costs are normalized to reg-reg move having cost 2. */ + igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2; + } else if (REG_P (src) && MEM_P (dst)) - igain - += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; + { + if (!speed_p) + /* Integer load/store is 3+ bytes and SSE 4+. */ + igain += COSTS_N_BYTES (3 * m - 4); + else + igain + += COSTS_N_INSNS (m * ix86_cost->int_store[2] + - ix86_cost->sse_store[sse_cost_idx]) / 2; + } else if (MEM_P (src) && REG_P (dst)) - igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; + { + if (!speed_p) + igain += COSTS_N_BYTES (3 * m - 4); + else + igain += COSTS_N_INSNS (m * ix86_cost->int_load[2] + - ix86_cost->sse_load[sse_cost_idx]) / 2; + } else { /* For operations on memory operands, include the overhead of explicit load and store instructions. */ if (MEM_P (dst)) - igain += optimize_insn_for_size_p () - ? -COSTS_N_BYTES (8) - : (m * (ix86_cost->int_load[2] - + ix86_cost->int_store[2]) - - (ix86_cost->sse_load[sse_cost_idx] + - ix86_cost->sse_store[sse_cost_idx])); + { + if (!speed_p) + /* ??? This probably should account size difference + of SSE and integer load rather than full SSE load. */ + igain -= COSTS_N_BYTES (8); + else + { + int cost = (m * (ix86_cost->int_load[2] + + ix86_cost->int_store[2]) + - (ix86_cost->sse_load[sse_cost_idx] + + ix86_cost->sse_store[sse_cost_idx])); + igain += COSTS_N_INSNS (cost) / 2; + } + } switch (GET_CODE (src)) { @@ -595,7 +660,7 @@ general_scalar_chain::compute_convert_gain () igain += ix86_cost->shift_const - ix86_cost->sse_op; if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); + igain -= vector_const_cost (XEXP (src, 0), bb); break; case ROTATE: @@ -631,16 +696,17 @@ general_scalar_chain::compute_convert_gain () igain += m * ix86_cost->add; if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); + igain -= vector_const_cost (XEXP (src, 0), bb); if (CONST_INT_P (XEXP (src, 1))) - igain -= vector_const_cost (XEXP (src, 1)); + igain -= vector_const_cost (XEXP (src, 1), bb); if (MEM_P (XEXP (src, 1))) { - if (optimize_insn_for_size_p ()) + if (!speed_p) igain -= COSTS_N_BYTES (m == 2 ? 3 : 5); else - igain += m * ix86_cost->int_load[2] - - ix86_cost->sse_load[sse_cost_idx]; + igain += COSTS_N_INSNS + (m * ix86_cost->int_load[2] + - ix86_cost->sse_load[sse_cost_idx]) / 2; } break; @@ -698,7 +764,7 @@ general_scalar_chain::compute_convert_gain () case CONST_INT: if (REG_P (dst)) { - if (optimize_insn_for_size_p ()) + if (!speed_p) { /* xor (2 bytes) vs. xorps (3 bytes). */ if (src == const0_rtx) @@ -722,14 +788,14 @@ general_scalar_chain::compute_convert_gain () /* DImode can be immediate for TARGET_64BIT and SImode always. */ igain += m * COSTS_N_INSNS (1); - igain -= vector_const_cost (src); + igain -= vector_const_cost (src, bb); } } else if (MEM_P (dst)) { igain += (m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]); - igain -= vector_const_cost (src); + igain -= vector_const_cost (src, bb); } break; @@ -737,13 +803,14 @@ general_scalar_chain::compute_convert_gain () if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx) { // movd (4 bytes) replaced with movdqa (4 bytes). - if (!optimize_insn_for_size_p ()) - igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move; + if (!!speed_p) + igain += COSTS_N_INSNS (ix86_cost->sse_to_integer + - ix86_cost->xmm_move) / 2; } else { // pshufd; movd replaced with pshufd. - if (optimize_insn_for_size_p ()) + if (!speed_p) igain += COSTS_N_BYTES (4); else igain += ix86_cost->sse_to_integer; @@ -755,55 +822,34 @@ general_scalar_chain::compute_convert_gain () } } + if (speed_p) + weighted_gain += bb_freq * igain; + gain += igain; + if (igain != 0 && dump_file) { - fprintf (dump_file, " Instruction gain %d for ", igain); + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for", + igain, bb_freq.to_double ()); dump_insn_slim (dump_file, insn); } - gain += igain; } if (dump_file) - fprintf (dump_file, " Instruction conversion gain: %d\n", gain); - - /* Cost the integer to sse and sse to integer moves. */ - if (!optimize_function_for_size_p (cfun)) - { - cost += n_sse_to_integer * ix86_cost->sse_to_integer; - /* ??? integer_to_sse but we only have that in the RA cost table. - Assume sse_to_integer/integer_to_sse are the same which they - are at the moment. */ - cost += n_integer_to_sse * ix86_cost->sse_to_integer; - } - else if (TARGET_64BIT || smode == SImode) - { - cost += n_sse_to_integer * COSTS_N_BYTES (4); - cost += n_integer_to_sse * COSTS_N_BYTES (4); - } - else if (TARGET_SSE4_1) - { - /* vmovd (4 bytes) + vpextrd (6 bytes). */ - cost += n_sse_to_integer * COSTS_N_BYTES (10); - /* vmovd (4 bytes) + vpinsrd (6 bytes). */ - cost += n_integer_to_sse * COSTS_N_BYTES (10); - } - else { - /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ - cost += n_sse_to_integer * COSTS_N_BYTES (13); - /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ - cost += n_integer_to_sse * COSTS_N_BYTES (12); + fprintf (dump_file, " Instruction conversion gain: %d, \n", + gain); + fprintf (dump_file, " Registers conversion cost: %d\n", + cost_sse_integer); + fprintf (dump_file, " Weighted instruction conversion gain: %.2f, \n", + weighted_gain.to_double ()); + fprintf (dump_file, " Weighted registers conversion cost: %.2f\n", + weighted_cost_sse_integer.to_double ()); } - if (dump_file) - fprintf (dump_file, " Registers conversion cost: %d\n", cost); - - gain -= cost; - - if (dump_file) - fprintf (dump_file, " Total gain: %d\n", gain); - - return gain; + if (weighted_gain != weighted_cost_sse_integer) + return weighted_gain > weighted_cost_sse_integer; + else + return gain > cost_sse_integer;; } /* Insert generated conversion instruction sequence INSNS @@ -902,8 +948,7 @@ scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg) else emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), gen_gpr_to_xmm_move_src (vmode, reg))); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_conversion_insns (seq, insn); if (dump_file) @@ -970,8 +1015,7 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src) else emit_move_insn (dst, src); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_conversion_insns (seq, insn); if (dump_file) @@ -1066,8 +1110,7 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn) { start_sequence (); vec_cst = validize_mem (force_const_mem (vmode, vec_cst)); - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); emit_insn_before (seq, insn); } @@ -1508,33 +1551,34 @@ general_scalar_chain::convert_insn (rtx_insn *insn) with numerous special cases. */ static int -timode_immed_const_gain (rtx cst) +timode_immed_const_gain (rtx cst, basic_block bb) { /* movabsq vs. movabsq+vmovq+vunpacklqdq. */ if (CONST_WIDE_INT_P (cst) && CONST_WIDE_INT_NUNITS (cst) == 2 && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1)) - return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9) + return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9) : -COSTS_N_INSNS (2); /* 2x movabsq ~ vmovdqa. */ return 0; } -/* Compute a gain for chain conversion. */ +/* Return true it's cost profitable for for chain conversion. */ -int +bool timode_scalar_chain::compute_convert_gain () { /* Assume that if we have to move TImode values between units, then transforming this chain isn't worth it. */ - if (n_sse_to_integer || n_integer_to_sse) - return -1; + if (cost_sse_integer) + return false; bitmap_iterator bi; unsigned insn_uid; /* Split ties to prefer V1TImode when not optimizing for size. */ int gain = optimize_size ? 0 : 1; + sreal weighted_gain = 0; if (dump_file) fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); @@ -1546,34 +1590,36 @@ timode_scalar_chain::compute_convert_gain () rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); HOST_WIDE_INT op1val; + basic_block bb = BLOCK_FOR_INSN (insn); int scost, vcost; int igain = 0; + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + sreal bb_freq = bb->count.to_sreal_scale (entry_count); switch (GET_CODE (src)) { case REG: - if (optimize_insn_for_size_p ()) + if (!speed_p) igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3); else igain = COSTS_N_INSNS (1); break; case MEM: - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7) - : COSTS_N_INSNS (1); + igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1); break; case CONST_INT: if (MEM_P (dst) && standard_sse_constant_p (src, V1TImode)) - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1; + igain = !speed_p ? COSTS_N_BYTES (11) : 1; break; case CONST_WIDE_INT: /* 2 x mov vs. vmovdqa. */ if (MEM_P (dst)) - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (3) - : COSTS_N_INSNS (1); + igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1); break; case NOT: @@ -1587,14 +1633,14 @@ timode_scalar_chain::compute_convert_gain () if (!MEM_P (dst)) igain = COSTS_N_INSNS (1); if (CONST_SCALAR_INT_P (XEXP (src, 1))) - igain += timode_immed_const_gain (XEXP (src, 1)); + igain += timode_immed_const_gain (XEXP (src, 1), bb); break; case ASHIFT: case LSHIFTRT: /* See ix86_expand_v1ti_shift. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_insn_for_size_p ()) + if (!speed_p) { if (op1val == 64 || op1val == 65) scost = COSTS_N_BYTES (5); @@ -1628,7 +1674,7 @@ timode_scalar_chain::compute_convert_gain () case ASHIFTRT: /* See ix86_expand_v1ti_ashiftrt. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_insn_for_size_p ()) + if (!speed_p) { if (op1val == 64 || op1val == 127) scost = COSTS_N_BYTES (7); @@ -1706,7 +1752,7 @@ timode_scalar_chain::compute_convert_gain () case ROTATERT: /* See ix86_expand_v1ti_rotate. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_insn_for_size_p ()) + if (!speed_p) { scost = COSTS_N_BYTES (13); if ((op1val & 31) == 0) @@ -1738,34 +1784,40 @@ timode_scalar_chain::compute_convert_gain () { if (GET_CODE (XEXP (src, 0)) == AND) /* and;and;or (9 bytes) vs. ptest (5 bytes). */ - igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4) - : COSTS_N_INSNS (2); + igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2); /* or (3 bytes) vs. ptest (5 bytes). */ - else if (optimize_insn_for_size_p ()) + else if (!speed_p) igain = -COSTS_N_BYTES (2); } else if (XEXP (src, 1) == const1_rtx) /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */ - igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6) - : -COSTS_N_INSNS (1); + igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1); break; default: break; } + gain += igain; + if (speed_p) + weighted_gain += bb_freq * igain; + if (igain != 0 && dump_file) { - fprintf (dump_file, " Instruction gain %d for ", igain); + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for ", + igain, bb_freq.to_double ()); dump_insn_slim (dump_file, insn); } - gain += igain; } if (dump_file) - fprintf (dump_file, " Total gain: %d\n", gain); + fprintf (dump_file, " Total gain: %d, weighted gain %.2f\n", + gain, weighted_gain.to_double ()); - return gain; + if (weighted_gain > (sreal) 0) + return true; + else + return gain > 0; } /* Fix uses of converted REG in debug insns. */ @@ -1874,8 +1926,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) src = validize_mem (force_const_mem (V1TImode, src)); use_move = MEM_P (dst); } - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); if (seq) emit_insn_before (seq, insn); if (use_move) @@ -2561,7 +2612,7 @@ convert_scalars_to_vector (bool timode_p) conversions. */ if (chain->build (&candidates[i], uid, disallowed)) { - if (chain->compute_convert_gain () > 0) + if (chain->compute_convert_gain ()) converted_insns += chain->convert (); else if (dump_file) fprintf (dump_file, "Chain #%d conversion is not profitable\n", @@ -3034,6 +3085,82 @@ ix86_rpad_gate () && optimize_function_for_speed_p (cfun)); } +/* Generate a vector set, DEST = SRC, at entry of the nearest dominator + for basic block map BBS, which is in the fake loop that contains the + whole function, so that there is only a single vector set in the + whole function. If not nullptr, INNER_SCALAR is the inner scalar of + SRC, as (reg:SI 99) in (vec_duplicate:V4SI (reg:SI 99)). */ + +static void +ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs, + rtx inner_scalar = nullptr) +{ + basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); + while (bb->loop_father->latch + != EXIT_BLOCK_PTR_FOR_FN (cfun)) + bb = get_immediate_dominator (CDI_DOMINATORS, + bb->loop_father->header); + + rtx set = gen_rtx_SET (dest, src); + + rtx_insn *insn = BB_HEAD (bb); + while (insn && !NONDEBUG_INSN_P (insn)) + { + if (insn == BB_END (bb)) + { + insn = NULL; + break; + } + insn = NEXT_INSN (insn); + } + + rtx_insn *set_insn; + if (insn == BB_HEAD (bb)) + { + set_insn = emit_insn_before (set, insn); + if (dump_file) + { + fprintf (dump_file, "\nPlace:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } + } + else + { + rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb); + set_insn = emit_insn_after (set, after); + if (dump_file) + { + fprintf (dump_file, "\nPlace:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, after); + fprintf (dump_file, "\n"); + } + } + + if (inner_scalar) + { + /* Set the source in (vec_duplicate:V4SI (reg:SI 99)). */ + rtx reg = XEXP (src, 0); + if ((REG_P (inner_scalar) || MEM_P (inner_scalar)) + && GET_MODE (reg) != GET_MODE (inner_scalar)) + inner_scalar = gen_rtx_SUBREG (GET_MODE (reg), inner_scalar, 0); + rtx set = gen_rtx_SET (reg, inner_scalar); + insn = emit_insn_before (set, set_insn); + if (dump_file) + { + fprintf (dump_file, "\nAdd:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\n"); + } + } +} + /* At entry of the nearest common dominator for basic blocks with conversions/rcp/sqrt/rsqrt/round, generate a single vxorps %xmmN, %xmmN, %xmmN @@ -3155,7 +3282,6 @@ remove_partial_avx_dependency (void) /* Generate an XMM vector SET. */ set = gen_rtx_SET (vec, src); set_insn = emit_insn_before (set, insn); - df_insn_rescan (set_insn); if (cfun->can_throw_non_call_exceptions) { @@ -3188,35 +3314,10 @@ remove_partial_avx_dependency (void) calculate_dominance_info (CDI_DOMINATORS); loop_optimizer_init (AVOID_CFG_MODIFICATIONS); - /* Generate a vxorps at entry of the nearest dominator for basic - blocks with conversions, which is in the fake loop that - contains the whole function, so that there is only a single - vxorps in the whole function. */ - bb = nearest_common_dominator_for_set (CDI_DOMINATORS, - convert_bbs); - while (bb->loop_father->latch - != EXIT_BLOCK_PTR_FOR_FN (cfun)) - bb = get_immediate_dominator (CDI_DOMINATORS, - bb->loop_father->header); - - set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); - - insn = BB_HEAD (bb); - while (insn && !NONDEBUG_INSN_P (insn)) - { - if (insn == BB_END (bb)) - { - insn = NULL; - break; - } - insn = NEXT_INSN (insn); - } - if (insn == BB_HEAD (bb)) - set_insn = emit_insn_before (set, insn); - else - set_insn = emit_insn_after (set, - insn ? PREV_INSN (insn) : BB_END (bb)); - df_insn_rescan (set_insn); + ix86_place_single_vector_set (v4sf_const0, + CONST0_RTX (V4SFmode), + convert_bbs); + loop_optimizer_finalize (); if (!control_flow_insns.is_empty ()) @@ -3288,6 +3389,568 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt) return new pass_remove_partial_avx_dependency (ctxt); } +/* Return a machine mode suitable for vector SIZE with SMODE inner + mode. */ + +static machine_mode +ix86_get_vector_cse_mode (unsigned int size, machine_mode smode) +{ + /* Use the inner scalar mode of vector broadcast source in: + + (set (reg:V8DF 394) + (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ]))) + + to compute the vector mode for broadcast from vector source. + */ + if (VECTOR_MODE_P (smode)) + smode = GET_MODE_INNER (smode); + scalar_mode s_mode = as_a <scalar_mode> (smode); + poly_uint64 nunits = size / GET_MODE_SIZE (smode); + machine_mode mode = mode_for_vector (s_mode, nunits).require (); + return mode; +} + +/* Replace the source operand of instructions in VECTOR_INSNS with + VECTOR_CONST in VECTOR_MODE. */ + +static void +replace_vector_const (machine_mode vector_mode, rtx vector_const, + auto_bitmap &vector_insns, + machine_mode scalar_mode) +{ + bitmap_iterator bi; + unsigned int id; + + EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi) + { + rtx_insn *insn = DF_INSN_UID_GET (id)->insn; + + /* Get the single SET instruction. */ + rtx set = single_set (insn); + rtx src = SET_SRC (set); + rtx dest = SET_DEST (set); + machine_mode mode = GET_MODE (dest); + + rtx replace; + /* Replace the source operand with VECTOR_CONST. */ + if (SUBREG_P (src) || mode == vector_mode) + replace = vector_const; + else + { + unsigned int size = GET_MODE_SIZE (mode); + if (size < ix86_regmode_natural_size (mode)) + { + /* If the mode size is smaller than its natural size, + first insert an extra move with a QI vector SUBREG + of the same size to avoid validate_subreg failure. */ + machine_mode vmode + = ix86_get_vector_cse_mode (size, scalar_mode); + rtx vreg; + if (mode == vmode) + vreg = vector_const; + else + { + vreg = gen_reg_rtx (vmode); + rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0); + rtx pat = gen_rtx_SET (vreg, vsubreg); + rtx_insn *vinsn = emit_insn_before (pat, insn); + if (dump_file) + { + fprintf (dump_file, "\nInsert an extra move:\n\n"); + print_rtl_single (dump_file, vinsn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } + } + replace = gen_rtx_SUBREG (mode, vreg, 0); + } + else + replace = gen_rtx_SUBREG (mode, vector_const, 0); + } + + if (dump_file) + { + fprintf (dump_file, "\nReplace:\n\n"); + print_rtl_single (dump_file, insn); + } + SET_SRC (set) = replace; + /* Drop possible dead definitions. */ + PATTERN (insn) = set; + INSN_CODE (insn) = -1; + recog_memoized (insn); + if (dump_file) + { + fprintf (dump_file, "\nwith:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } + df_insn_rescan (insn); + } +} + +enum x86_cse_kind +{ + X86_CSE_CONST0_VECTOR, + X86_CSE_CONSTM1_VECTOR, + X86_CSE_VEC_DUP +}; + +struct redundant_load +{ + /* Bitmap of basic blocks with broadcast instructions. */ + auto_bitmap bbs; + /* Bitmap of broadcast instructions. */ + auto_bitmap insns; + /* The broadcast inner scalar. */ + rtx val; + /* The inner scalar mode. */ + machine_mode mode; + /* The instruction which sets the inner scalar. Nullptr if the inner + scalar is applied to the whole function, instead of within the same + block. */ + rtx_insn *def_insn; + /* The widest broadcast source. */ + rtx broadcast_source; + /* The widest broadcast register. */ + rtx broadcast_reg; + /* The basic block of the broadcast instruction. */ + basic_block bb; + /* The number of broadcast instructions with the same inner scalar. */ + unsigned HOST_WIDE_INT count; + /* The threshold of broadcast instructions with the same inner + scalar. */ + unsigned int threshold; + /* The widest broadcast size in bytes. */ + unsigned int size; + /* Load kind. */ + x86_cse_kind kind; +}; + +/* Return the inner scalar if OP is a broadcast, else return nullptr. */ + +static rtx +ix86_broadcast_inner (rtx op, machine_mode mode, + machine_mode *scalar_mode_p, + x86_cse_kind *kind_p, rtx_insn **insn_p) +{ + if (op == const0_rtx || op == CONST0_RTX (mode)) + { + *scalar_mode_p = QImode; + *kind_p = X86_CSE_CONST0_VECTOR; + *insn_p = nullptr; + return const0_rtx; + } + else if ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && (op == constm1_rtx || op == CONSTM1_RTX (mode))) + || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + && float_vector_all_ones_operand (op, mode))) + { + *scalar_mode_p = QImode; + *kind_p = X86_CSE_CONSTM1_VECTOR; + *insn_p = nullptr; + return constm1_rtx; + } + + mode = GET_MODE (op); + int nunits = GET_MODE_NUNITS (mode); + if (nunits < 2) + return nullptr; + + *kind_p = X86_CSE_VEC_DUP; + + rtx reg; + if (GET_CODE (op) == VEC_DUPLICATE) + { + /* Only + (vec_duplicate:V4SI (reg:SI 99)) + (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S8 A64])) + are supported. Set OP to the broadcast source by default. */ + op = XEXP (op, 0); + reg = op; + if (SUBREG_P (op) + && SUBREG_BYTE (op) == 0 + && !paradoxical_subreg_p (op)) + reg = SUBREG_REG (op); + if (!REG_P (reg)) + { + if (MEM_P (op) + && SYMBOL_REF_P (XEXP (op, 0)) + && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0))) + { + /* Handle constant broadcast from memory. */ + *scalar_mode_p = GET_MODE_INNER (mode); + *insn_p = nullptr; + return op; + } + return nullptr; + } + } + else if (CONST_VECTOR_P (op)) + { + rtx first = XVECEXP (op, 0, 0); + for (int i = 1; i < nunits; ++i) + { + rtx tmp = XVECEXP (op, 0, i); + /* Vector duplicate value. */ + if (!rtx_equal_p (tmp, first)) + return nullptr; + } + *scalar_mode_p = GET_MODE (first); + *insn_p = nullptr; + return first; + } + else + return nullptr; + + mode = GET_MODE (op); + + /* Only single def chain is supported. */ + df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg)); + if (!ref + || DF_REF_IS_ARTIFICIAL (ref) + || DF_REF_NEXT_REG (ref) != nullptr) + return nullptr; + + rtx_insn *insn = DF_REF_INSN (ref); + rtx set = single_set (insn); + if (!set) + return nullptr; + + rtx src = SET_SRC (set); + + if (CONST_INT_P (src)) + { + /* Handle sequences like + + (set (reg:SI 99) + (const_int 34 [0x22])) + (set (reg:V4SI 98) + (vec_duplicate:V4SI (reg:SI 99))) + + Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an + integer constant. */ + op = src; + *insn_p = nullptr; + } + else + { + /* Handle sequences like + + (set (reg:QI 105 [ c ]) + (reg:QI 5 di [ c ])) + (set (reg:V64QI 102 [ _1 ]) + (vec_duplicate:V64QI (reg:QI 105 [ c ]))) + + (set (reg/v:SI 116 [ argc ]) + (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32])) + (set (reg:V4SI 119 [ _45 ]) + (vec_duplicate:V4SI (reg/v:SI 116 [ argc ]))) + + (set (reg:SI 98 [ _1 ]) + (sign_extend:SI (reg:QI 106 [ c ]))) + (set (reg:V16SI 103 [ _2 ]) + (vec_duplicate:V16SI (reg:SI 98 [ _1 ]))) + + (set (reg:SI 102 [ cost ]) + (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40]))) + (set (reg:V4HI 103 [ _16 ]) + (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0))) + + (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0) + (ashift:SI (reg:SI 158) + (subreg:QI (reg:SI 156 [ _2 ]) 0))) + (set (reg:V16HI 183 [ _61 ]) + (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ]))) + + Set *INSN_P to INSN and return the broadcast source otherwise. */ + *insn_p = insn; + } + + *scalar_mode_p = mode; + return op; +} + +/* At entry of the nearest common dominator for basic blocks with vector + CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest + vector set instruction for all CONST0_RTX and integer CONSTM1_RTX + uses. + + NB: We want to generate only a single widest vector set to cover the + whole function. The LCM algorithm isn't appropriate here since it + may place a vector set inside the loop. */ + +static unsigned int +remove_redundant_vector_load (void) +{ + timevar_push (TV_MACH_DEP); + + auto_vec<redundant_load *> loads; + redundant_load *load; + basic_block bb; + rtx_insn *insn; + unsigned int i; + + df_set_flags (DF_DEFER_INSN_RESCAN); + + bool recursive_call_p = cfun->machine->recursive_function; + + FOR_EACH_BB_FN (bb, cfun) + { + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + rtx set = single_set (insn); + if (!set) + continue; + + /* Record single set vector instruction with CONST0_RTX and + CONSTM1_RTX source. Record basic blocks with CONST0_RTX and + CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the + maximum size of CONST0_RTX and CONSTM1_RTX. */ + + rtx dest = SET_DEST (set); + machine_mode mode = GET_MODE (dest); + /* Skip non-vector instruction. */ + if (!VECTOR_MODE_P (mode)) + continue; + + rtx src = SET_SRC (set); + /* Skip non-vector load instruction. */ + if (!REG_P (dest) && !SUBREG_P (dest)) + continue; + + rtx_insn *def_insn; + machine_mode scalar_mode; + x86_cse_kind kind; + rtx val = ix86_broadcast_inner (src, mode, &scalar_mode, + &kind, &def_insn); + if (!val) + continue; + + /* Remove redundant register loads if there are more than 2 + loads will be used. */ + unsigned int threshold = 2; + + /* Check if there is a matching redundant vector load. */ + bool matched = false; + FOR_EACH_VEC_ELT (loads, i, load) + if (load->val + && load->kind == kind + && load->mode == scalar_mode + && (load->bb == bb + || kind < X86_CSE_VEC_DUP + /* Non all 0s/1s vector load must be in the same + basic block if it is in a recursive call. */ + || !recursive_call_p) + && rtx_equal_p (load->val, val)) + { + /* Record vector instruction. */ + bitmap_set_bit (load->insns, INSN_UID (insn)); + + /* Record the maximum vector size. */ + if (load->size < GET_MODE_SIZE (mode)) + load->size = GET_MODE_SIZE (mode); + + /* Record the basic block. */ + bitmap_set_bit (load->bbs, bb->index); + load->count++; + matched = true; + break; + } + + if (matched) + continue; + + /* We see this vector broadcast the first time. */ + load = new redundant_load; + + load->val = copy_rtx (val); + load->mode = scalar_mode; + load->size = GET_MODE_SIZE (mode); + load->def_insn = def_insn; + load->count = 1; + load->threshold = threshold; + load->bb = BLOCK_FOR_INSN (insn); + load->kind = kind; + + bitmap_set_bit (load->insns, INSN_UID (insn)); + bitmap_set_bit (load->bbs, bb->index); + + loads.safe_push (load); + } + } + + bool replaced = false; + rtx reg, broadcast_source, broadcast_reg; + FOR_EACH_VEC_ELT (loads, i, load) + if (load->count >= load->threshold) + { + machine_mode mode = ix86_get_vector_cse_mode (load->size, + load->mode); + broadcast_reg = gen_reg_rtx (mode); + if (load->def_insn) + { + /* Replace redundant vector loads with a single vector load + in the same basic block. */ + reg = load->val; + if (load->mode != GET_MODE (reg)) + reg = gen_rtx_SUBREG (load->mode, reg, 0); + broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg); + replace_vector_const (mode, broadcast_reg, load->insns, + load->mode); + } + else + { + /* This is a constant integer/double vector. If the + inner scalar is 0 or -1, set vector to CONST0_RTX + or CONSTM1_RTX directly. */ + rtx reg; + switch (load->kind) + { + case X86_CSE_CONST0_VECTOR: + broadcast_source = CONST0_RTX (mode); + break; + case X86_CSE_CONSTM1_VECTOR: + broadcast_source = CONSTM1_RTX (mode); + break; + default: + reg = gen_reg_rtx (load->mode); + broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg); + break; + } + replace_vector_const (mode, broadcast_reg, load->insns, + load->mode); + } + load->broadcast_source = broadcast_source; + load->broadcast_reg = broadcast_reg; + replaced = true; + } + + if (replaced) + { + auto_vec<rtx_insn *> control_flow_insns; + + /* (Re-)discover loops so that bb->loop_father can be used in the + analysis below. */ + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); + + FOR_EACH_VEC_ELT (loads, i, load) + if (load->count >= load->threshold) + { + if (load->def_insn) + { + /* Insert a broadcast after the original scalar + definition. */ + rtx set = gen_rtx_SET (load->broadcast_reg, + load->broadcast_source); + insn = emit_insn_after (set, load->def_insn); + + if (cfun->can_throw_non_call_exceptions) + { + /* Handle REG_EH_REGION note in DEF_INSN. */ + rtx note = find_reg_note (load->def_insn, + REG_EH_REGION, nullptr); + if (note) + { + control_flow_insns.safe_push (load->def_insn); + add_reg_note (insn, REG_EH_REGION, + XEXP (note, 0)); + } + } + + if (dump_file) + { + fprintf (dump_file, "\nAdd:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, load->def_insn); + fprintf (dump_file, "\n"); + } + } + else + ix86_place_single_vector_set (load->broadcast_reg, + load->broadcast_source, + load->bbs, + (load->kind == X86_CSE_VEC_DUP + ? load->val + : nullptr)); + } + + loop_optimizer_finalize (); + + if (!control_flow_insns.is_empty ()) + { + free_dominance_info (CDI_DOMINATORS); + + FOR_EACH_VEC_ELT (control_flow_insns, i, insn) + if (control_flow_insn_p (insn)) + { + /* Split the block after insn. There will be a fallthru + edge, which is OK so we keep it. We have to create + the exception edges ourselves. */ + bb = BLOCK_FOR_INSN (insn); + split_block (bb, insn); + rtl_make_eh_edge (NULL, bb, BB_END (bb)); + } + } + + df_process_deferred_rescans (); + } + + df_clear_flags (DF_DEFER_INSN_RESCAN); + + timevar_pop (TV_MACH_DEP); + return 0; +} + +namespace { + +const pass_data pass_data_remove_redundant_vector_load = +{ + RTL_PASS, /* type */ + "rrvl", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_remove_redundant_vector_load : public rtl_opt_pass +{ +public: + pass_remove_redundant_vector_load (gcc::context *ctxt) + : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *fun) final override + { + return (TARGET_SSE2 + && optimize + && optimize_function_for_speed_p (fun)); + } + + unsigned int execute (function *) final override + { + return remove_redundant_vector_load (); + } +}; // class pass_remove_redundant_vector_load + +} // anon namespace + +rtl_opt_pass * +make_pass_remove_redundant_vector_load (gcc::context *ctxt) +{ + return new pass_remove_redundant_vector_load (ctxt); +} + /* Convert legacy instructions that clobbers EFLAGS to APX_NF instructions when there are no flag set between a flag producer and user. */ @@ -3962,7 +4625,6 @@ ix86_get_function_versions_dispatcher (void *decl) struct cgraph_node *node = NULL; struct cgraph_node *default_node = NULL; struct cgraph_function_version_info *node_v = NULL; - struct cgraph_function_version_info *first_v = NULL; tree dispatch_decl = NULL; @@ -3979,37 +4641,16 @@ ix86_get_function_versions_dispatcher (void *decl) if (node_v->dispatcher_resolver != NULL) return node_v->dispatcher_resolver; - /* Find the default version and make it the first node. */ - first_v = node_v; - /* Go to the beginning of the chain. */ - while (first_v->prev != NULL) - first_v = first_v->prev; - default_version_info = first_v; - while (default_version_info != NULL) - { - if (is_function_default_version - (default_version_info->this_node->decl)) - break; - default_version_info = default_version_info->next; - } + /* The default node is always the beginning of the chain. */ + default_version_info = node_v; + while (default_version_info->prev != NULL) + default_version_info = default_version_info->prev; + default_node = default_version_info->this_node; /* If there is no default node, just return NULL. */ - if (default_version_info == NULL) + if (!is_function_default_version (default_node->decl)) return NULL; - /* Make default info the first node. */ - if (first_v != default_version_info) - { - default_version_info->prev->next = default_version_info->next; - if (default_version_info->next) - default_version_info->next->prev = default_version_info->prev; - first_v->prev = default_version_info; - default_version_info->next = first_v; - default_version_info->prev = NULL; - } - - default_node = default_version_info->this_node; - #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) if (targetm.has_ifunc_p ()) { diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h index 24b0c4e..e3719b3 100644 --- a/gcc/config/i386/i386-features.h +++ b/gcc/config/i386/i386-features.h @@ -153,12 +153,13 @@ class scalar_chain bitmap insns_conv; hash_map<rtx, rtx> defs_map; - unsigned n_sse_to_integer; - unsigned n_integer_to_sse; + /* Cost of inserted conversion between ineteger and sse. */ + int cost_sse_integer; + sreal weighted_cost_sse_integer; auto_vec<rtx_insn *> control_flow_insns; bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed); - virtual int compute_convert_gain () = 0; + virtual bool compute_convert_gain () = 0; int convert (); protected: @@ -184,11 +185,11 @@ class general_scalar_chain : public scalar_chain public: general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) : scalar_chain (smode_, vmode_) {} - int compute_convert_gain () final override; + bool compute_convert_gain () final override; private: void convert_insn (rtx_insn *insn) final override; - int vector_const_cost (rtx exp); + int vector_const_cost (rtx exp, basic_block bb); rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn); }; @@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain { public: timode_scalar_chain () : scalar_chain (TImode, V1TImode) {} - int compute_convert_gain () final override; + bool compute_convert_gain () final override; private: void fix_debug_reg_uses (rtx reg); diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 19d78d7..6fa601d 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -118,8 +118,6 @@ DEF_PTA(SHA512) DEF_PTA(SM4) DEF_PTA(APX_F) DEF_PTA(USER_MSR) -DEF_PTA(EVEX512) -DEF_PTA(AVX10_1_256) DEF_PTA(AVX10_1) DEF_PTA(AVX10_2) DEF_PTA(AMX_AVX512) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index a9fac01..09cb133 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -259,9 +259,7 @@ static struct ix86_target_opts isa2_opts[] = { "-msm3", OPTION_MASK_ISA2_SM3 }, { "-msha512", OPTION_MASK_ISA2_SHA512 }, { "-msm4", OPTION_MASK_ISA2_SM4 }, - { "-mevex512", OPTION_MASK_ISA2_EVEX512 }, { "-musermsr", OPTION_MASK_ISA2_USER_MSR }, - { "-mavx10.1-256", OPTION_MASK_ISA2_AVX10_1_256 }, { "-mavx10.1", OPTION_MASK_ISA2_AVX10_1 }, { "-mavx10.2", OPTION_MASK_ISA2_AVX10_2 }, { "-mamx-avx512", OPTION_MASK_ISA2_AMX_AVX512 }, @@ -713,8 +711,6 @@ ix86_function_specific_save (struct cl_target_option *ptr, ptr->x_ix86_apx_features = opts->x_ix86_apx_features; ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit; ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit; - ptr->x_ix86_no_avx512_explicit = opts->x_ix86_no_avx512_explicit; - ptr->x_ix86_no_avx10_1_explicit = opts->x_ix86_no_avx10_1_explicit; ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit; ptr->x_ix86_arch_string = opts->x_ix86_arch_string; ptr->x_ix86_tune_string = opts->x_ix86_tune_string; @@ -764,63 +760,63 @@ static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = { /* This table must be in sync with enum processor_type in i386.h. */ static const struct processor_costs *processor_cost_table[] = { - &generic_cost, - &i386_cost, - &i486_cost, - &pentium_cost, - &lakemont_cost, - &pentiumpro_cost, - &pentium4_cost, - &nocona_cost, - &core_cost, - &core_cost, - &core_cost, - &core_cost, - &atom_cost, - &slm_cost, - &slm_cost, - &slm_cost, - &tremont_cost, - &alderlake_cost, - &alderlake_cost, - &alderlake_cost, - &skylake_cost, - &skylake_cost, - &icelake_cost, - &icelake_cost, - &icelake_cost, - &skylake_cost, - &icelake_cost, - &skylake_cost, - &icelake_cost, - &alderlake_cost, - &icelake_cost, - &icelake_cost, - &icelake_cost, - &alderlake_cost, - &alderlake_cost, - &alderlake_cost, - &icelake_cost, - &intel_cost, - &lujiazui_cost, - &yongfeng_cost, - &shijidadao_cost, - &geode_cost, - &k6_cost, - &athlon_cost, - &k8_cost, - &amdfam10_cost, - &bdver_cost, - &bdver_cost, - &bdver_cost, - &bdver_cost, - &btver1_cost, - &btver2_cost, - &znver1_cost, - &znver2_cost, - &znver3_cost, - &znver4_cost, - &znver5_cost + &generic_cost, /* PROCESSOR_GENERIC. */ + &i386_cost, /* PROCESSOR_I386. */ + &i486_cost, /* PROCESSOR_I486. */ + &pentium_cost, /* PROCESSOR_PENTIUM. */ + &lakemont_cost, /* PROCESSOR_LAKEMONT. */ + &pentiumpro_cost, /* PROCESSOR_PENTIUMPRO. */ + &pentium4_cost, /* PROCESSOR_PENTIUM4. */ + &nocona_cost, /* PROCESSOR_NOCONA. */ + &core_cost, /* PROCESSOR_CORE2. */ + &core_cost, /* PROCESSOR_NEHALEM. */ + &core_cost, /* PROCESSOR_SANDYBRIDGE. */ + &core_cost, /* PROCESSOR_HASWELL. */ + &atom_cost, /* PROCESSOR_BONNELL. */ + &slm_cost, /* PROCESSOR_SILVERMONT. */ + &slm_cost, /* PROCESSOR_GOLDMONT. */ + &slm_cost, /* PROCESSOR_GOLDMONT_PLUS. */ + &tremont_cost, /* PROCESSOR_TREMONT. */ + &alderlake_cost, /* PROCESSOR_SIERRAFOREST. */ + &alderlake_cost, /* PROCESSOR_GRANDRIDGE. */ + &alderlake_cost, /* PROCESSOR_CLEARWATERFOREST. */ + &skylake_cost, /* PROCESSOR_SKYLAKE. */ + &skylake_cost, /* PROCESSOR_SKYLAKE_AVX512. */ + &icelake_cost, /* PROCESSOR_CANNONLAKE. */ + &icelake_cost, /* PROCESSOR_ICELAKE_CLIENT. */ + &icelake_cost, /* PROCESSOR_ICELAKE_SERVER. */ + &skylake_cost, /* PROCESSOR_CASCADELAKE. */ + &icelake_cost, /* PROCESSOR_TIGERLAKE. */ + &skylake_cost, /* PROCESSOR_COOPERLAKE. */ + &icelake_cost, /* PROCESSOR_SAPPHIRERAPIDS. */ + &alderlake_cost, /* PROCESSOR_ALDERLAKE. */ + &icelake_cost, /* PROCESSOR_ROCKETLAKE. */ + &icelake_cost, /* PROCESSOR_GRANITERAPIDS. */ + &icelake_cost, /* PROCESSOR_GRANITERAPIDS_D. */ + &alderlake_cost, /* PROCESSOR_ARROWLAKE. */ + &alderlake_cost, /* PROCESSOR_ARROWLAKE_S. */ + &alderlake_cost, /* PROCESSOR_PANTHERLAKE. */ + &icelake_cost, /* PROCESSOR_DIAMONDRAPIDS. */ + &alderlake_cost, /* PROCESSOR_INTEL. */ + &lujiazui_cost, /* PROCESSOR_LUJIAZUI. */ + &yongfeng_cost, /* PROCESSOR_YONGFENG. */ + &shijidadao_cost, /* PROCESSOR_SHIJIDADAO. */ + &geode_cost, /* PROCESSOR_GEODE. */ + &k6_cost, /* PROCESSOR_K6. */ + &athlon_cost, /* PROCESSOR_ATHLON. */ + &k8_cost, /* PROCESSOR_K8. */ + &amdfam10_cost, /* PROCESSOR_AMDFAM10. */ + &bdver_cost, /* PROCESSOR_BDVER1. */ + &bdver_cost, /* PROCESSOR_BDVER2. */ + &bdver_cost, /* PROCESSOR_BDVER3. */ + &bdver_cost, /* PROCESSOR_BDVER4. */ + &btver1_cost, /* PROCESSOR_BTVER1. */ + &btver2_cost, /* PROCESSOR_BTVER2. */ + &znver1_cost, /* PROCESSOR_ZNVER1. */ + &znver2_cost, /* PROCESSOR_ZNVER2. */ + &znver3_cost, /* PROCESSOR_ZNVER3. */ + &znver4_cost, /* PROCESSOR_ZNVER4. */ + &znver5_cost /* PROCESSOR_ZNVER5. */ }; /* Guarantee that the array is aligned with enum processor_type. */ @@ -858,8 +854,6 @@ ix86_function_specific_restore (struct gcc_options *opts, opts->x_ix86_apx_features = ptr->x_ix86_apx_features; opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit; - opts->x_ix86_no_avx512_explicit = ptr->x_ix86_no_avx512_explicit; - opts->x_ix86_no_avx10_1_explicit = ptr->x_ix86_no_avx10_1_explicit; opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit; opts->x_ix86_arch_string = ptr->x_ix86_arch_string; opts->x_ix86_tune_string = ptr->x_ix86_tune_string; @@ -1131,11 +1125,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("sha512", OPT_msha512), IX86_ATTR_ISA ("sm4", OPT_msm4), IX86_ATTR_ISA ("apxf", OPT_mapxf), - IX86_ATTR_ISA ("evex512", OPT_mevex512), IX86_ATTR_ISA ("usermsr", OPT_musermsr), - IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256), IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1), - IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1), IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2), IX86_ATTR_ISA ("amx-avx512", OPT_mamx_avx512), IX86_ATTR_ISA ("amx-tf32", OPT_mamx_tf32), @@ -1271,13 +1262,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], } } - /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated. */ - if (opt == OPT_msse4 && !opt_set_p) - { - opt = OPT_mno_sse4; - opt_set_p = true; - } - /* Process the option. */ if (opt == N_OPTS) { @@ -1436,18 +1420,6 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, target_clone_attr)) return error_mark_node; - /* AVX10.1-256 will enable only 256 bit AVX512F features by setting all - AVX512 related ISA flags and not setting EVEX512. When it is used - with avx512 related function attribute, we need to enable 512 bit to - align with the command line behavior. Manually set EVEX512 for this - scenario. */ - if ((def->x_ix86_isa_flags2 & OPTION_MASK_ISA2_AVX10_1_256) - && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512F) - && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F) - && !(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512) - && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)) - opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512; - /* If the changed options are different from the default, rerun ix86_option_override_internal, and then save the options away. The string options are attribute options, and will be undone @@ -1458,10 +1430,7 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args, || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] || enum_opts_set.x_ix86_fpmath - || enum_opts_set.x_prefer_vector_width_type - || (!(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AVX10_1_256) - && (opts->x_ix86_isa_flags2_explicit - & OPTION_MASK_ISA2_AVX10_1_256))) + || enum_opts_set.x_prefer_vector_width_type) { /* If we are using the default tune= or arch=, undo the string assigned, and use the default. */ @@ -2025,7 +1994,7 @@ ix86_option_override_internal (bool main_args_p, struct gcc_options *opts_set) { unsigned int i; - unsigned HOST_WIDE_INT ix86_arch_mask, avx512_isa_flags, avx512_isa_flags2; + unsigned HOST_WIDE_INT ix86_arch_mask; const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); /* -mrecip options. */ @@ -2044,15 +2013,6 @@ ix86_option_override_internal (bool main_args_p, { "vec-sqrt", RECIP_MASK_VEC_SQRT }, }; - avx512_isa_flags = OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD - | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW - | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA - | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2 - | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ - | OPTION_MASK_ISA_AVX512BITALG; - avx512_isa_flags2 = OPTION_MASK_ISA2_AVX512FP16 - | OPTION_MASK_ISA2_AVX512BF16; - /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) @@ -2674,107 +2634,6 @@ ix86_option_override_internal (bool main_args_p, &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM) & ~opts->x_ix86_isa_flags_explicit); - /* Emit a warning if AVX10.1 options is used with AVX512/EVEX512 options except - for the following option combinations: - 1. Both AVX10.1-512 and AVX512 with 512 bit vector width are enabled with no - explicit disable on other AVX512 features. - 2. Both AVX10.1-256 and AVX512 w/o 512 bit vector width are enabled with no - explicit disable on other AVX512 features. - 3. Both AVX10.1 and AVX512 are disabled. */ - if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2)) - { - if (opts->x_ix86_no_avx512_explicit - && (((~(avx512_isa_flags & opts->x_ix86_isa_flags) - & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit))) - || ((~((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512) - & opts->x_ix86_isa_flags2) - & ((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512) - & opts->x_ix86_isa_flags2_explicit))))) - warning (0, "%<-mno-evex512%> or %<-mno-avx512XXX%> cannot disable " - "AVX10 instructions when AVX10.1-512 is available in GCC 15, " - "behavior will change to it will disable that part of " - "AVX512 instructions since GCC 16"); - } - else if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2)) - { - if (TARGET_EVEX512_P (opts->x_ix86_isa_flags2) - && (OPTION_MASK_ISA2_EVEX512 & opts->x_ix86_isa_flags2_explicit)) - { - if (!TARGET_AVX512F_P (opts->x_ix86_isa_flags) - || !(OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit)) - { - /* We should not emit 512 bit instructions under AVX10.1-256 - when EVEX512 is enabled w/o any AVX512 features enabled. - Disable EVEX512 bit for this. */ - warning (0, "Using %<-mevex512%> without any AVX512 features " - "enabled together with AVX10.1 only will not enable " - "any AVX512 or AVX10.1-512 features, using 256 as " - "max vector size"); - opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_EVEX512; - } - else - warning (0, "Vector size conflicts between AVX10.1 and AVX512, " - "using 512 as max vector size"); - } - else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F) - && !(OPTION_MASK_ISA2_EVEX512 - & opts->x_ix86_isa_flags2_explicit)) - warning (0, "Vector size conflicts between AVX10.1 and AVX512, using " - "512 as max vector size"); - else if (opts->x_ix86_no_avx512_explicit - && (((~(avx512_isa_flags & opts->x_ix86_isa_flags) - & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit))) - || ((~(avx512_isa_flags2 & opts->x_ix86_isa_flags2) - & (avx512_isa_flags2 - & opts->x_ix86_isa_flags2_explicit))))) - warning (0, "%<-mno-avx512XXX%> cannot disable AVX10 instructions " - "when AVX10 is available in GCC 15, behavior will change " - "to it will disable that part of AVX512 instructions since " - "GCC 16"); - } - else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && (OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit)) - { - if (opts->x_ix86_no_avx10_1_explicit - && ((OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1) - & opts->x_ix86_isa_flags2_explicit)) - { - warning (0, "%<-mno-avx10.1-256, -mno-avx10.1-512%> cannot disable " - "AVX512 instructions when %<-mavx512XXX%> in GCC 15, " - "behavior will change to it will disable all the " - "instructions in GCC 16"); - /* Reset those unset AVX512 flags set by AVX10 options when AVX10 is - disabled. */ - if (OPTION_MASK_ISA2_AVX10_1_256 & opts->x_ix86_isa_flags2_explicit) - { - opts->x_ix86_isa_flags = (~avx512_isa_flags - & opts->x_ix86_isa_flags) - | (avx512_isa_flags & opts->x_ix86_isa_flags - & opts->x_ix86_isa_flags_explicit); - opts->x_ix86_isa_flags2 = (~avx512_isa_flags2 - & opts->x_ix86_isa_flags2) - | (avx512_isa_flags2 & opts->x_ix86_isa_flags2 - & opts->x_ix86_isa_flags2_explicit); - } - } - } - - /* Set EVEX512 if one of the following conditions meets: - 1. AVX512 is enabled while EVEX512 is not explicitly set/unset. - 2. AVX10.1-512 is enabled. */ - if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2) - || (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512))) - opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512; - - /* Enable all AVX512 related ISAs when AVX10.1 is enabled. */ - if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2)) - { - opts->x_ix86_isa_flags |= avx512_isa_flags; - opts->x_ix86_isa_flags2 |= avx512_isa_flags2; - } - /* Validate -mpreferred-stack-boundary= value or default it to PREFERRED_STACK_BOUNDARY_DEFAULT. */ ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; @@ -2828,8 +2687,8 @@ ix86_option_override_internal (bool main_args_p, if (flag_nop_mcount) error ("%<-mnop-mcount%> is not compatible with this target"); #endif - if (flag_nop_mcount && flag_pic) - error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>"); + if (flag_nop_mcount && flag_pic && !flag_plt) + error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>"); /* Accept -msseregparm only if at least SSE support is enabled. */ if (TARGET_SSEREGPARM_P (opts->x_target_flags) @@ -3049,8 +2908,7 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_move_max = opts->x_prefer_vector_width_type; if (opts_set->x_ix86_move_max == PVW_NONE) { - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) opts->x_ix86_move_max = PVW_AVX512; /* Align with vectorizer to avoid potential STLF issue. */ else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) @@ -3076,8 +2934,7 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_store_max = opts->x_prefer_vector_width_type; if (opts_set->x_ix86_store_max == PVW_NONE) { - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) opts->x_ix86_store_max = PVW_AVX512; /* Align with vectorizer to avoid potential STLF issue. */ else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) @@ -3374,13 +3231,13 @@ ix86_simd_clone_adjust (struct cgraph_node *node) case 'e': if (TARGET_PREFER_AVX256) { - if (!TARGET_AVX512F || !TARGET_EVEX512) - str = "avx512f,evex512,prefer-vector-width=512"; + if (!TARGET_AVX512F) + str = "avx512f,prefer-vector-width=512"; else str = "prefer-vector-width=512"; } - else if (!TARGET_AVX512F || !TARGET_EVEX512) - str = "avx512f,evex512"; + else if (!TARGET_AVX512F) + str = "avx512f"; break; default: gcc_unreachable (); @@ -3420,19 +3277,21 @@ ix86_set_func_type (tree fndecl) interrupt function in this case. */ enum call_saved_registers_type no_callee_saved_registers = TYPE_DEFAULT_CALL_SAVED_REGISTERS; - if (lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + if (lookup_attribute ("preserve_none", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + no_callee_saved_registers = TYPE_PRESERVE_NONE; + else if ((lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + || (ix86_noreturn_no_callee_saved_registers + && TREE_THIS_VOLATILE (fndecl) + && optimize + && !optimize_debug + && (TREE_NOTHROW (fndecl) || !flag_exceptions) + && !lookup_attribute ("interrupt", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl))) + && !lookup_attribute ("no_caller_saved_registers", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))) no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS; - else if (ix86_noreturn_no_callee_saved_registers - && TREE_THIS_VOLATILE (fndecl) - && optimize - && !optimize_debug - && (TREE_NOTHROW (fndecl) || !flag_exceptions) - && !lookup_attribute ("interrupt", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl))) - && !lookup_attribute ("no_caller_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) - no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP; if (cfun->machine->func_type == TYPE_UNKNOWN) { @@ -3444,9 +3303,16 @@ ix86_set_func_type (tree fndecl) "interrupt and naked attributes are not compatible"); if (no_callee_saved_registers) - error_at (DECL_SOURCE_LOCATION (fndecl), - "%qs and %qs attributes are not compatible", - "interrupt", "no_callee_saved_registers"); + { + const char *attr; + if (no_callee_saved_registers == TYPE_PRESERVE_NONE) + attr = "preserve_none"; + else + attr = "no_callee_saved_registers"; + error_at (DECL_SOURCE_LOCATION (fndecl), + "%qs and %qs attributes are not compatible", + "interrupt", attr); + } int nargs = 0; for (tree arg = DECL_ARGUMENTS (fndecl); @@ -3468,21 +3334,13 @@ ix86_set_func_type (tree fndecl) else { cfun->machine->func_type = TYPE_NORMAL; - if (lookup_attribute ("no_caller_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) + if (no_callee_saved_registers) + cfun->machine->call_saved_registers + = no_callee_saved_registers; + else if (lookup_attribute ("no_caller_saved_registers", + TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) cfun->machine->call_saved_registers = TYPE_NO_CALLER_SAVED_REGISTERS; - if (no_callee_saved_registers) - { - if (cfun->machine->call_saved_registers - == TYPE_NO_CALLER_SAVED_REGISTERS) - error_at (DECL_SOURCE_LOCATION (fndecl), - "%qs and %qs attributes are not compatible", - "no_caller_saved_registers", - "no_callee_saved_registers"); - cfun->machine->call_saved_registers - = no_callee_saved_registers; - } } } } @@ -3671,11 +3529,21 @@ ix86_set_current_function (tree fndecl) || (cfun->machine->call_saved_registers == TYPE_NO_CALLER_SAVED_REGISTERS)) { - /* Don't allow SSE, MMX nor x87 instructions since they - may change processor state. */ + /* Don't allow AVX, AVX512, MMX nor x87 instructions since they + may change processor state. Don't allow SSE instructions in + exception/interrupt service routines. */ const char *isa; if (TARGET_SSE) - isa = "SSE"; + { + if (TARGET_AVX512F) + isa = "AVX512"; + else if (TARGET_AVX) + isa = "AVX"; + else if (cfun->machine->func_type != TYPE_NORMAL) + isa = "SSE"; + else + isa = NULL; + } else if (TARGET_MMX) isa = "MMX/3Dnow"; else if (TARGET_80387) @@ -4100,9 +3968,50 @@ ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int, } static tree -ix86_handle_call_saved_registers_attribute (tree *, tree, tree, +ix86_handle_call_saved_registers_attribute (tree *node, tree name, tree, int, bool *) { + const char *attr1 = nullptr; + const char *attr2 = nullptr; + + if (is_attribute_p ("no_callee_saved_registers", name)) + { + /* Disallow preserve_none and no_caller_saved_registers + attributes. */ + attr1 = "no_callee_saved_registers"; + if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node))) + attr2 = "preserve_none"; + else if (lookup_attribute ("no_caller_saved_registers", + TYPE_ATTRIBUTES (*node))) + attr2 = "no_caller_saved_registers"; + } + else if (is_attribute_p ("no_caller_saved_registers", name)) + { + /* Disallow preserve_none and no_callee_saved_registers + attributes. */ + attr1 = "no_caller_saved_registers"; + if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node))) + attr2 = "preserve_none"; + else if (lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (*node))) + attr2 = "no_callee_saved_registers"; + } + else if (is_attribute_p ("preserve_none", name)) + { + /* Disallow no_callee_saved_registers and no_caller_saved_registers + attributes. */ + attr1 = "preserve_none"; + if (lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (*node))) + attr2 = "no_caller_saved_registers"; + else if (lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (*node))) + attr2 = "no_callee_saved_registers"; + } + + if (attr2) + error ("%qs and %qs attributes are not compatible", attr1, attr2); + return NULL_TREE; } @@ -4264,6 +4173,8 @@ static const attribute_spec ix86_gnu_attributes[] = ix86_handle_interrupt_attribute, NULL }, { "no_caller_saved_registers", 0, 0, false, true, true, false, ix86_handle_call_saved_registers_attribute, NULL }, + { "preserve_none", 0, 0, false, true, true, true, + ix86_handle_call_saved_registers_attribute, NULL }, { "no_callee_saved_registers", 0, 0, false, true, true, true, ix86_handle_call_saved_registers_attribute, NULL }, { "naked", 0, 0, true, false, false, false, diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def index 39f8bc6..06f0288 100644 --- a/gcc/config/i386/i386-passes.def +++ b/gcc/config/i386/i386-passes.def @@ -35,5 +35,6 @@ along with GCC; see the file COPYING3. If not see PR116174. */ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops); + INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load); INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency); INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index bea3fd4..69bc0ee 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -186,6 +186,7 @@ extern void ix86_expand_v2di_ashiftrt (rtx[]); extern rtx ix86_replace_reg_with_reg (rtx, rtx, rtx); extern rtx ix86_find_base_term (rtx); extern bool ix86_check_movabs (rtx, int); +extern bool ix86_check_movs (rtx, int); extern bool ix86_check_no_addr_space (rtx); extern void ix86_split_idivmod (machine_mode, rtx[], bool); extern bool ix86_hardreg_mov_ok (rtx, rtx); @@ -198,6 +199,7 @@ extern int ix86_attr_length_vex_default (rtx_insn *, bool, bool); extern rtx ix86_libcall_value (machine_mode); extern bool ix86_function_arg_regno_p (int); extern void ix86_asm_output_function_label (FILE *, const char *, tree); +extern void ix86_asm_output_labelref (FILE *, const char *, const char *); extern void ix86_call_abi_override (const_tree); extern int ix86_reg_parm_stack_space (const_tree); @@ -280,6 +282,7 @@ extern tree ix86_valid_target_attribute_tree (tree, tree, struct gcc_options *, struct gcc_options *, bool); extern unsigned int ix86_get_callcvt (const_tree); +extern bool ix86_type_no_callee_saved_registers_p (const_tree); #endif @@ -427,12 +430,21 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area (gcc::context *); extern rtl_opt_pass *make_pass_remove_partial_avx_dependency (gcc::context *); +extern rtl_opt_pass *make_pass_remove_redundant_vector_load + (gcc::context *); extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *); extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *); extern bool ix86_has_no_direct_extern_access; extern bool ix86_rpad_gate (); +extern sbitmap ix86_get_separate_components (void); +extern sbitmap ix86_components_for_bb (basic_block); +extern void ix86_disqualify_components (sbitmap, edge, sbitmap, bool); +extern void ix86_emit_prologue_components (sbitmap); +extern void ix86_emit_epilogue_components (sbitmap); +extern void ix86_set_handled_components (sbitmap); + /* In i386-expand.cc. */ bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*, HOST_WIDE_INT*); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4f8380c4..313522b 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -100,6 +100,7 @@ along with GCC; see the file COPYING3. If not see #include "i386-features.h" #include "function-abi.h" #include "rtl-error.h" +#include "gimple-pretty-print.h" /* This file should be included last. */ #include "target-def.h" @@ -334,6 +335,14 @@ static int const x86_64_ms_abi_int_parameter_registers[4] = CX_REG, DX_REG, R8_REG, R9_REG }; +/* Similar as Clang's preserve_none function parameter passing. + NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p. */ + +static int const x86_64_preserve_none_int_parameter_registers[6] = +{ + R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG +}; + static int const x86_64_int_return_registers[4] = { AX_REG, DX_REG, DI_REG, SI_REG @@ -458,6 +467,10 @@ int ix86_arch_specified; indirect thunk pushes the return address onto stack, destroying red-zone. + NB: Don't use red-zone for functions with no_caller_saved_registers + and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small + for 31 GPRs or 15 GPRs + 16 XMM registers. + TODO: If we can reserve the first 2 WORDs, for PUSH and, another for CALL, in red-zone, we can allow local indirect jumps with indirect thunk. */ @@ -467,6 +480,9 @@ ix86_using_red_zone (void) { return (TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI + && ((!TARGET_APX_EGPR && !TARGET_SSE) + || (cfun->machine->call_saved_registers + != TYPE_NO_CALLER_SAVED_REGISTERS)) && (!cfun->machine->has_local_indirect_jump || cfun->machine->indirect_branch_type == indirect_branch_keep)); } @@ -891,6 +907,18 @@ x86_64_elf_unique_section (tree decl, int reloc) default_unique_section (decl, reloc); } +/* Return true if TYPE has no_callee_saved_registers or preserve_none + attribute. */ + +bool +ix86_type_no_callee_saved_registers_p (const_tree type) +{ + return (lookup_attribute ("no_callee_saved_registers", + TYPE_ATTRIBUTES (type)) != NULL + || lookup_attribute ("preserve_none", + TYPE_ATTRIBUTES (type)) != NULL); +} + #ifdef COMMON_ASM_OP #ifndef LARGECOMM_SECTION_ASM_OP @@ -1012,11 +1040,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) /* Sibling call isn't OK if callee has no callee-saved registers and the calling function has callee-saved registers. */ - if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS - && (cfun->machine->call_saved_registers - != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP) - && lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (type))) + if ((cfun->machine->call_saved_registers + != TYPE_NO_CALLEE_SAVED_REGISTERS) + && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE + && ix86_type_no_callee_saved_registers_p (type)) return false; /* If outgoing reg parm stack space changes, we cannot do sibcall. */ @@ -1181,10 +1208,16 @@ ix86_comp_type_attributes (const_tree type1, const_tree type2) != ix86_function_regparm (type2, NULL)) return 0; - if (lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (type1)) - != lookup_attribute ("no_callee_saved_registers", - TYPE_ATTRIBUTES (type2))) + if (ix86_type_no_callee_saved_registers_p (type1) + != ix86_type_no_callee_saved_registers_p (type2)) + return 0; + + /* preserve_none attribute uses a different calling convention is + only for 64-bit. */ + if (TARGET_64BIT + && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1)) + != lookup_attribute ("preserve_none", + TYPE_ATTRIBUTES (type2)))) return 0; return 1; @@ -1546,7 +1579,10 @@ ix86_function_arg_regno_p (int regno) if (call_abi == SYSV_ABI && regno == AX_REG) return true; - if (call_abi == MS_ABI) + if (cfun + && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE) + parm_regs = x86_64_preserve_none_int_parameter_registers; + else if (call_abi == MS_ABI) parm_regs = x86_64_ms_abi_int_parameter_registers; else parm_regs = x86_64_int_parameter_registers; @@ -1709,6 +1745,19 @@ ix86_asm_output_function_label (FILE *out_file, const char *fname, } } +/* Output a user-defined label. In AT&T syntax, registers are prefixed + with %, so labels require no punctuation. In Intel syntax, registers + are unprefixed, so labels may clash with registers or other operators, + and require quoting. */ +void +ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label) +{ + if (ASSEMBLER_DIALECT == ASM_ATT) + fprintf (file, "%s%s", prefix, label); + else + fprintf (file, "\"%s%s\"", prefix, label); +} + /* Implementation of call abi switching target hook. Specific to FNDECL the specific call register sets are set. See also ix86_conditional_register_usage for more details. */ @@ -1788,8 +1837,7 @@ ix86_init_pic_reg (void) add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); } - seq = get_insns (); - end_sequence (); + seq = end_sequence (); entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); insert_insn_on_edge (seq, entry_edge); @@ -1816,6 +1864,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ memset (cum, 0, sizeof (*cum)); + tree preserve_none_type; if (fndecl) { target = cgraph_node::get (fndecl); @@ -1824,12 +1873,24 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ target = target->function_symbol (); local_info_node = cgraph_node::local_info_node (target->decl); cum->call_abi = ix86_function_abi (target->decl); + preserve_none_type = TREE_TYPE (target->decl); } else - cum->call_abi = ix86_function_abi (fndecl); + { + cum->call_abi = ix86_function_abi (fndecl); + preserve_none_type = TREE_TYPE (fndecl); + } } else - cum->call_abi = ix86_function_type_abi (fntype); + { + cum->call_abi = ix86_function_type_abi (fntype); + preserve_none_type = fntype; + } + cum->preserve_none_abi + = (preserve_none_type + && (lookup_attribute ("preserve_none", + TYPE_ATTRIBUTES (preserve_none_type)) + != nullptr)); cum->caller = caller; @@ -1991,8 +2052,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) && GET_MODE_INNER (mode) == innermode) { - if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512) - && !TARGET_IAMCU) + if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) { static bool warnedavx512f; static bool warnedavx512f_ret; @@ -3403,9 +3463,15 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, break; } + const int *parm_regs; + if (cum->preserve_none_abi) + parm_regs = x86_64_preserve_none_int_parameter_registers; + else + parm_regs = x86_64_int_parameter_registers; + return construct_container (mode, orig_mode, type, 0, cum->nregs, cum->sse_nregs, - &x86_64_int_parameter_registers [cum->regno], + &parm_regs[cum->regno], cum->sse_regno); } @@ -4415,7 +4481,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) /* AVX512F values are returned in ZMM0 if available. */ if (size == 64) - return !TARGET_AVX512F || !TARGET_EVEX512; + return !TARGET_AVX512F; } if (mode == XFmode) @@ -4570,6 +4636,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) if (max > X86_64_REGPARM_MAX) max = X86_64_REGPARM_MAX; + const int *parm_regs; + if (cum->preserve_none_abi) + parm_regs = x86_64_preserve_none_int_parameter_registers; + else + parm_regs = x86_64_int_parameter_registers; + for (i = cum->regno; i < max; i++) { mem = gen_rtx_MEM (word_mode, @@ -4577,8 +4649,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) MEM_NOTRAP_P (mem) = 1; set_mem_alias_set (mem, set); emit_move_insn (mem, - gen_rtx_REG (word_mode, - x86_64_int_parameter_registers[i])); + gen_rtx_REG (word_mode, parm_regs[i])); } if (ix86_varargs_fpr_size) @@ -4732,8 +4803,7 @@ ix86_va_start (tree valist, rtx nextarg) start_sequence (); emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); push_topmost_sequence (); emit_insn_after (seq, entry_of_function ()); @@ -5173,6 +5243,27 @@ ix86_check_movabs (rtx insn, int opnum) return volatile_ok || !MEM_VOLATILE_P (mem); } +/* Return true if XVECEXP idx of INSN satisfies MOVS arguments. */ +bool +ix86_check_movs (rtx insn, int idx) +{ + rtx pat = PATTERN (insn); + gcc_assert (GET_CODE (pat) == PARALLEL); + + rtx set = XVECEXP (pat, 0, idx); + gcc_assert (GET_CODE (set) == SET); + + rtx dst = SET_DEST (set); + gcc_assert (MEM_P (dst)); + + rtx src = SET_SRC (set); + gcc_assert (MEM_P (src)); + + return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)) + && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)) + || Pmode == word_mode)); +} + /* Return false if INSN contains a MEM with a non-default address space. */ bool ix86_check_no_addr_space (rtx insn) @@ -5349,7 +5440,7 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode) switch (GET_MODE_SIZE (mode)) { case 64: - if (TARGET_AVX512F && TARGET_EVEX512) + if (TARGET_AVX512F) return 2; break; case 32: @@ -5402,10 +5493,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (TARGET_AVX512VL) return "vpxord\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vpxord\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vpxord\t%g0, %g0, %g0"; } return "vpxor\t%x0, %x0, %x0"; @@ -5421,19 +5510,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (TARGET_AVX512VL) return "vxorpd\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vxorpd\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vxorpd\t%g0, %g0, %g0"; } else { if (TARGET_AVX512VL) return "vpxorq\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vpxorq\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vpxorq\t%g0, %g0, %g0"; } } return "vxorpd\t%x0, %x0, %x0"; @@ -5450,19 +5535,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (TARGET_AVX512VL) return "vxorps\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vxorps\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vxorps\t%g0, %g0, %g0"; } else { if (TARGET_AVX512VL) return "vpxord\t%x0, %x0, %x0"; - else if (TARGET_EVEX512) - return "vpxord\t%g0, %g0, %g0"; else - gcc_unreachable (); + return "vpxord\t%g0, %g0, %g0"; } } return "vxorps\t%x0, %x0, %x0"; @@ -5483,7 +5564,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) case MODE_XI: case MODE_V8DF: case MODE_V16SF: - gcc_assert (TARGET_AVX512F && TARGET_EVEX512); + gcc_assert (TARGET_AVX512F); return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; case MODE_OI: @@ -5499,10 +5580,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (TARGET_AVX512VL) return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"; - else if (TARGET_EVEX512) - return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; else - gcc_unreachable (); + return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; } return (TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" @@ -5516,7 +5595,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) { if (GET_MODE_SIZE (mode) == 64) { - gcc_assert (TARGET_AVX512F && TARGET_EVEX512); + gcc_assert (TARGET_AVX512F); return "vpcmpeqd\t%t0, %t0, %t0"; } else if (GET_MODE_SIZE (mode) == 32) @@ -5528,7 +5607,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) } else if (vector_all_ones_zero_extend_quarter_operand (x, mode)) { - gcc_assert (TARGET_AVX512F && TARGET_EVEX512); + gcc_assert (TARGET_AVX512F); return "vpcmpeqd\t%x0, %x0, %x0"; } @@ -5639,8 +5718,6 @@ ix86_get_ssemov (rtx *operands, unsigned size, || memory_operand (operands[1], mode)) gcc_unreachable (); size = 64; - /* We need TARGET_EVEX512 to move into zmm register. */ - gcc_assert (TARGET_EVEX512); switch (type) { case opcode_int: @@ -5679,7 +5756,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); @@ -5721,7 +5798,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu8" : "%vmovdqu") : "%vmovdqa"); @@ -5741,7 +5818,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, : "%vmovaps"); else opcode = (misaligned_p - ? (TARGET_AVX512BW + ? (TARGET_AVX512BW && evex_reg_p ? "vmovdqu16" : "%vmovdqu") : "%vmovdqa"); @@ -6449,7 +6526,7 @@ output_set_got (rtx dest, rtx label) xops[0] = dest; - if (TARGET_VXWORKS_RTP && flag_pic) + if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic) { /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ xops[2] = gen_rtx_MEM (Pmode, @@ -6694,9 +6771,7 @@ ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) || !frame_pointer_needed)); case TYPE_NO_CALLEE_SAVED_REGISTERS: - return false; - - case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP: + case TYPE_PRESERVE_NONE: if (regno != HARD_FRAME_POINTER_REGNUM) return false; break; @@ -6773,7 +6848,9 @@ ix86_nsaved_sseregs (void) int nregs = 0; int regno; - if (!TARGET_64BIT_MS_ABI) + if (!TARGET_64BIT_MS_ABI + && (cfun->machine->call_saved_registers + != TYPE_NO_CALLER_SAVED_REGISTERS)) return 0; for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) @@ -6881,6 +6958,26 @@ ix86_pro_and_epilogue_can_use_push2pop2 (int nregs) && (nregs + aligned) >= 3; } +/* Check if push/pop should be used to save/restore registers. */ +static bool +save_regs_using_push_pop (HOST_WIDE_INT to_allocate) +{ + return ((!to_allocate && cfun->machine->frame.nregs <= 1) + || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) + /* If static stack checking is enabled and done with probes, + the registers need to be saved before allocating the frame. */ + || flag_stack_check == STATIC_BUILTIN_STACK_CHECK + /* If stack clash probing needs a loop, then it needs a + scratch register. But the returned register is only guaranteed + to be safe to use after register saves are complete. So if + stack clash protections are enabled and the allocated frame is + larger than the probe interval, then use pushes to save + callee saved registers. */ + || (flag_stack_clash_protection + && !ix86_target_stack_probe () + && to_allocate > get_probe_interval ())); +} + /* Fill structure ix86_frame about frame of currently computed function. */ static void @@ -6961,12 +7058,18 @@ ix86_compute_frame_layout (void) gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); gcc_assert (preferred_alignment <= stack_alignment_needed); - /* The only ABI saving SSE regs should be 64-bit ms_abi. */ - gcc_assert (TARGET_64BIT || !frame->nsseregs); + /* The only ABI saving SSE regs should be 64-bit ms_abi or with + no_caller_saved_registers attribue. */ + gcc_assert (TARGET_64BIT + || (cfun->machine->call_saved_registers + == TYPE_NO_CALLER_SAVED_REGISTERS) + || !frame->nsseregs); if (TARGET_64BIT && m->call_ms2sysv) { gcc_assert (stack_alignment_needed >= 16); - gcc_assert (!frame->nsseregs); + gcc_assert ((cfun->machine->call_saved_registers + == TYPE_NO_CALLER_SAVED_REGISTERS) + || !frame->nsseregs); } /* For SEH we have to limit the amount of code movement into the prologue. @@ -7165,20 +7268,7 @@ ix86_compute_frame_layout (void) /* Size prologue needs to allocate. */ to_allocate = offset - frame->sse_reg_save_offset; - if ((!to_allocate && frame->nregs <= 1) - || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) - /* If static stack checking is enabled and done with probes, - the registers need to be saved before allocating the frame. */ - || flag_stack_check == STATIC_BUILTIN_STACK_CHECK - /* If stack clash probing needs a loop, then it needs a - scratch register. But the returned register is only guaranteed - to be safe to use after register saves are complete. So if - stack clash protections are enabled and the allocated frame is - larger than the probe interval, then use pushes to save - callee saved registers. */ - || (flag_stack_clash_protection - && !ix86_target_stack_probe () - && to_allocate > get_probe_interval ())) + if (save_regs_using_push_pop (to_allocate)) frame->save_regs_using_mov = false; if (ix86_using_red_zone () @@ -7636,7 +7726,9 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) { - ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); + /* Skip registers, already processed by shrink wrap separate. */ + if (!cfun->machine->reg_is_wrapped_separately[regno]) + ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); cfa_offset -= UNITS_PER_WORD; } } @@ -7729,8 +7821,15 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, add_frame_related_expr = true; } - insn = emit_insn (gen_pro_epilogue_adjust_stack_add - (Pmode, dest, src, addend)); + /* Shrink wrap separate may insert prologue between TEST and JMP. In order + not to affect EFlags, emit add without reg clobbering. */ + if (crtl->shrink_wrapped_separate) + insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc + (Pmode, dest, src, addend)); + else + insn = emit_insn (gen_pro_epilogue_adjust_stack_add + (Pmode, dest, src, addend)); + if (style >= 0) ix86_add_queued_cfa_restore_notes (insn); @@ -7914,6 +8013,15 @@ ix86_update_stack_boundary (void) if (ix86_tls_descriptor_calls_expanded_in_cfun && crtl->preferred_stack_boundary < 128) crtl->preferred_stack_boundary = 128; + + /* For 32-bit MS ABI, both the incoming and preferred stack boundaries + are 32 bits, but if force_align_arg_pointer is specified, it should + prefer 128 bits for a backward-compatibility reason, which is also + what the doc suggests. */ + if (lookup_attribute ("force_align_arg_pointer", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) + && crtl->preferred_stack_boundary < 128) + crtl->preferred_stack_boundary = 128; } /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is @@ -7944,8 +8052,7 @@ ix86_get_drap_rtx (void) start_sequence (); drap_vreg = copy_to_reg (arg_ptr); - seq = get_insns (); - end_sequence (); + seq = end_sequence (); insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); if (!optimize) @@ -8466,6 +8573,128 @@ output_probe_stack_range (rtx reg, rtx end) return ""; } +/* Data passed to ix86_update_stack_alignment. */ +struct stack_access_data +{ + /* The stack access register. */ + const_rtx reg; + /* Pointer to stack alignment. */ + unsigned int *stack_alignment; +}; + +/* Update the maximum stack slot alignment from memory alignment in PAT. */ + +static void +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) +{ + /* This insn may reference stack slot. Update the maximum stack slot + alignment if the memory is referenced by the stack access register. */ + stack_access_data *p = (stack_access_data *) data; + + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, pat, ALL) + { + auto op = *iter; + if (MEM_P (op)) + { + if (reg_mentioned_p (p->reg, XEXP (op, 0))) + { + unsigned int alignment = MEM_ALIGN (op); + + if (alignment > *p->stack_alignment) + *p->stack_alignment = alignment; + break; + } + else + iter.skip_subrtxes (); + } + } +} + +/* Helper function for ix86_find_all_reg_uses. */ + +static void +ix86_find_all_reg_uses_1 (HARD_REG_SET ®set, + rtx set, unsigned int regno, + auto_bitmap &worklist) +{ + rtx dest = SET_DEST (set); + + if (!REG_P (dest)) + return; + + /* Reject non-Pmode modes. */ + if (GET_MODE (dest) != Pmode) + return; + + unsigned int dst_regno = REGNO (dest); + + if (TEST_HARD_REG_BIT (regset, dst_regno)) + return; + + const_rtx src = SET_SRC (set); + + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, src, ALL) + { + auto op = *iter; + + if (MEM_P (op)) + iter.skip_subrtxes (); + + if (REG_P (op) && REGNO (op) == regno) + { + /* Add this register to register set. */ + add_to_hard_reg_set (®set, Pmode, dst_regno); + bitmap_set_bit (worklist, dst_regno); + break; + } + } +} + +/* Find all registers defined with register REGNO. */ + +static void +ix86_find_all_reg_uses (HARD_REG_SET ®set, + unsigned int regno, auto_bitmap &worklist) +{ + for (df_ref ref = DF_REG_USE_CHAIN (regno); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + + if (!NONJUMP_INSN_P (insn)) + continue; + + unsigned int ref_regno = DF_REF_REGNO (ref); + + rtx set = single_set (insn); + if (set) + { + ix86_find_all_reg_uses_1 (regset, set, + ref_regno, worklist); + continue; + } + + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != PARALLEL) + continue; + + for (int i = 0; i < XVECLEN (pat, 0); i++) + { + rtx exp = XVECEXP (pat, 0, i); + + if (GET_CODE (exp) == SET) + ix86_find_all_reg_uses_1 (regset, exp, + ref_regno, worklist); + } + } +} + /* Set stack_frame_required to false if stack frame isn't required. Update STACK_ALIGNMENT to the largest alignment, in bits, of stack slot used if stack frame is required and CHECK_STACK_SLOT is true. */ @@ -8484,10 +8713,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, add_to_hard_reg_set (&set_up_by_prologue, Pmode, HARD_FRAME_POINTER_REGNUM); - /* The preferred stack alignment is the minimum stack alignment. */ - if (stack_alignment > crtl->preferred_stack_boundary) - stack_alignment = crtl->preferred_stack_boundary; - bool require_stack_frame = false; FOR_EACH_BB_FN (bb, cfun) @@ -8499,27 +8724,67 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, set_up_by_prologue)) { require_stack_frame = true; - - if (check_stack_slot) - { - /* Find the maximum stack alignment. */ - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) - if (MEM_P (*iter) - && (reg_mentioned_p (stack_pointer_rtx, - *iter) - || reg_mentioned_p (frame_pointer_rtx, - *iter))) - { - unsigned int alignment = MEM_ALIGN (*iter); - if (alignment > stack_alignment) - stack_alignment = alignment; - } - } + break; } } cfun->machine->stack_frame_required = require_stack_frame; + + /* Stop if we don't need to check stack slot. */ + if (!check_stack_slot) + return; + + /* The preferred stack alignment is the minimum stack alignment. */ + if (stack_alignment > crtl->preferred_stack_boundary) + stack_alignment = crtl->preferred_stack_boundary; + + HARD_REG_SET stack_slot_access; + CLEAR_HARD_REG_SET (stack_slot_access); + + /* Stack slot can be accessed by stack pointer, frame pointer or + registers defined by stack pointer or frame pointer. */ + auto_bitmap worklist; + + add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM); + bitmap_set_bit (worklist, STACK_POINTER_REGNUM); + + if (frame_pointer_needed) + { + add_to_hard_reg_set (&stack_slot_access, Pmode, + HARD_FRAME_POINTER_REGNUM); + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM); + } + + unsigned int regno; + + do + { + regno = bitmap_clear_first_set_bit (worklist); + ix86_find_all_reg_uses (stack_slot_access, regno, worklist); + } + while (!bitmap_empty_p (worklist)); + + hard_reg_set_iterator hrsi; + stack_access_data data; + + data.stack_alignment = &stack_alignment; + + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi) + for (df_ref ref = DF_REG_USE_CHAIN (regno); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + + if (!NONJUMP_INSN_P (insn)) + continue; + + data.reg = DF_REF_REG (ref); + note_stores (insn, ix86_update_stack_alignment, &data); + } } /* Finalize stack_realign_needed and frame_pointer_needed flags, which @@ -9029,11 +9294,22 @@ ix86_expand_prologue (void) doing this if we have to probe the stack; at least on x86_64 the stack probe can turn into a call that clobbers a red zone location. */ else if (ix86_using_red_zone () - && (! TARGET_STACK_PROBE - || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) + && (! TARGET_STACK_PROBE + || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) { + HOST_WIDE_INT allocate_offset; + if (crtl->shrink_wrapped_separate) + { + allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset; + + /* Adjust the total offset at the beginning of the function. */ + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (allocate_offset), -1, + m->fs.cfa_reg == stack_pointer_rtx); + m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset; + } + ix86_emit_save_regs_using_mov (frame.reg_save_offset); - cfun->machine->red_zone_used = true; int_registers_saved = true; } } @@ -9611,30 +9887,35 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) { - rtx reg = gen_rtx_REG (word_mode, regno); - rtx mem; - rtx_insn *insn; - - mem = choose_baseaddr (cfa_offset, NULL); - mem = gen_frame_mem (word_mode, mem); - insn = emit_move_insn (reg, mem); - if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) + /* Skip registers, already processed by shrink wrap separate. */ + if (!cfun->machine->reg_is_wrapped_separately[regno]) { - /* Previously we'd represented the CFA as an expression - like *(%ebp - 8). We've just popped that value from - the stack, which means we need to reset the CFA to - the drap register. This will remain until we restore - the stack pointer. */ - add_reg_note (insn, REG_CFA_DEF_CFA, reg); - RTX_FRAME_RELATED_P (insn) = 1; + rtx reg = gen_rtx_REG (word_mode, regno); + rtx mem; + rtx_insn *insn; - /* This means that the DRAP register is valid for addressing. */ - m->fs.drap_valid = true; - } - else - ix86_add_cfa_restore_note (NULL, reg, cfa_offset); + mem = choose_baseaddr (cfa_offset, NULL); + mem = gen_frame_mem (word_mode, mem); + insn = emit_move_insn (reg, mem); + + if (m->fs.cfa_reg == crtl->drap_reg + && regno == REGNO (crtl->drap_reg)) + { + /* Previously we'd represented the CFA as an expression + like *(%ebp - 8). We've just popped that value from + the stack, which means we need to reset the CFA to + the drap register. This will remain until we restore + the stack pointer. */ + add_reg_note (insn, REG_CFA_DEF_CFA, reg); + RTX_FRAME_RELATED_P (insn) = 1; + /* DRAP register is valid for addressing. */ + m->fs.drap_valid = true; + } + else + ix86_add_cfa_restore_note (NULL, reg, cfa_offset); + } cfa_offset -= UNITS_PER_WORD; } } @@ -9913,10 +10194,11 @@ ix86_expand_epilogue (int style) less work than reloading sp and popping the register. */ else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1) restore_regs_via_mov = true; - else if (TARGET_EPILOGUE_USING_MOVE - && cfun->machine->use_fast_prologue_epilogue - && (frame.nregs > 1 - || m->fs.sp_offset != reg_save_offset)) + else if (crtl->shrink_wrapped_separate + || (TARGET_EPILOGUE_USING_MOVE + && cfun->machine->use_fast_prologue_epilogue + && (frame.nregs > 1 + || m->fs.sp_offset != reg_save_offset))) restore_regs_via_mov = true; else if (frame_pointer_needed && !frame.nregs @@ -9930,6 +10212,9 @@ ix86_expand_epilogue (int style) else restore_regs_via_mov = false; + if (crtl->shrink_wrapped_separate) + gcc_assert (restore_regs_via_mov); + if (restore_regs_via_mov || frame.nsseregs) { /* Ensure that the entire register save area is addressable via @@ -9982,6 +10267,7 @@ ix86_expand_epilogue (int style) gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); gcc_assert (!crtl->drap_reg); gcc_assert (!frame.nregs); + gcc_assert (!crtl->shrink_wrapped_separate); } else if (restore_regs_via_mov) { @@ -9996,6 +10282,8 @@ ix86_expand_epilogue (int style) rtx sa = EH_RETURN_STACKADJ_RTX; rtx_insn *insn; + gcc_assert (!crtl->shrink_wrapped_separate); + /* Stack realignment doesn't work with eh_return. */ if (crtl->stack_realign_needed) sorry ("Stack realignment not supported with " @@ -11177,6 +11465,9 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) x = XVECEXP (x, 0, 0); return (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); + case UNSPEC_SECREL32: + x = XVECEXP (x, 0, 0); + return GET_CODE (x) == SYMBOL_REF; default: return false; } @@ -11224,7 +11515,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) case E_OImode: case E_XImode: if (!standard_sse_constant_p (x, mode) - && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512 + && GET_MODE_SIZE (TARGET_AVX512F ? XImode : (TARGET_AVX ? OImode @@ -11313,6 +11604,9 @@ legitimate_pic_operand_p (rtx x) x = XVECEXP (inner, 0, 0); return (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); + case UNSPEC_SECREL32: + x = XVECEXP (inner, 0, 0); + return GET_CODE (x) == SYMBOL_REF; case UNSPEC_MACHOPIC_OFFSET: return legitimate_pic_address_disp_p (x); default: @@ -11493,6 +11787,9 @@ legitimate_pic_address_disp_p (rtx disp) disp = XVECEXP (disp, 0, 0); return (GET_CODE (disp) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); + case UNSPEC_SECREL32: + disp = XVECEXP (disp, 0, 0); + return GET_CODE (disp) == SYMBOL_REF; } return false; @@ -11770,6 +12067,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, case UNSPEC_INDNTPOFF: case UNSPEC_NTPOFF: case UNSPEC_DTPOFF: + case UNSPEC_SECREL32: break; default: @@ -11795,7 +12093,8 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF - && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32)) /* Non-constant pic memory reference. */ return false; } @@ -11946,7 +12245,7 @@ legitimize_pic_address (rtx orig, rtx reg) else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) /* We can't always use @GOTOFF for text labels on VxWorks, see gotoff_operand. */ - || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) + || (TARGET_VXWORKS_VAROFF && GET_CODE (addr) == LABEL_REF)) { #if TARGET_PECOFF rtx tmp = legitimize_pe_coff_symbol (addr, true); @@ -12119,6 +12418,24 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg) return tp; } +/* Construct the SYMBOL_REF for the _tls_index symbol. */ + +static GTY(()) rtx ix86_tls_index_symbol; + +#if TARGET_WIN32_TLS +static rtx +ix86_tls_index (void) +{ + if (!ix86_tls_index_symbol) + ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index"); + + if (flag_pic) + return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL)); + else + return ix86_tls_index_symbol; +} +#endif + /* Construct the SYMBOL_REF for the tls_get_addr function. */ static GTY(()) rtx ix86_tls_symbol; @@ -12177,6 +12494,26 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) machine_mode tp_mode = Pmode; int type; +#if TARGET_WIN32_TLS + off = gen_const_mem (SImode, ix86_tls_index ()); + set_mem_alias_set (off, GOT_ALIAS_SET); + + tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44)); + set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG); + + if (TARGET_64BIT) + off = convert_to_mode (Pmode, off, 1); + + base = force_reg (Pmode, off); + tp = copy_to_mode_reg (Pmode, tp); + + tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD)))); + set_mem_alias_set (tp, GOT_ALIAS_SET); + + base = force_reg (Pmode, tp); + + return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32))); +#else /* Fall back to global dynamic model if tool chain cannot support local dynamic. */ if (TARGET_SUN_TLS && !TARGET_64BIT @@ -12225,13 +12562,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT) { rtx rax = gen_rtx_REG (Pmode, AX_REG); + rtx rdi = gen_rtx_REG (Pmode, DI_REG); rtx_insn *insns; start_sequence (); emit_call_insn - (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr)); - insns = get_insns (); - end_sequence (); + (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi)); + insns = end_sequence (); if (GET_MODE (x) != Pmode) x = gen_rtx_ZERO_EXTEND (Pmode, x); @@ -12279,14 +12616,14 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT) { rtx rax = gen_rtx_REG (Pmode, AX_REG); + rtx rdi = gen_rtx_REG (Pmode, DI_REG); rtx_insn *insns; rtx eqv; start_sequence (); emit_call_insn - (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr)); - insns = get_insns (); - end_sequence (); + (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi)); + insns = end_sequence (); /* Attach a unique REG_EQUAL, to allow the RTL optimizers to share the LD_BASE result with other LD model accesses. */ @@ -12399,6 +12736,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) } return dest; +#endif } /* Return true if the TLS address requires insn using integer registers. @@ -12868,6 +13206,9 @@ output_pic_addr_const (FILE *file, rtx x, int code) case UNSPEC_INDNTPOFF: fputs ("@indntpoff", file); break; + case UNSPEC_SECREL32: + fputs ("@secrel32", file); + break; #if TARGET_MACHO case UNSPEC_MACHOPIC_OFFSET: putc ('-', file); @@ -12893,7 +13234,11 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x) { fputs (ASM_LONG, file); output_addr_const (file, x); +#if TARGET_WIN32_TLS + fputs ("@secrel32", file); +#else fputs ("@dtpoff", file); +#endif switch (size) { case 4: @@ -13127,7 +13472,7 @@ ix86_delegitimize_address_1 (rtx x, bool base_term_p) else if (base_term_p && pic_offset_table_rtx && !TARGET_MACHO - && !TARGET_VXWORKS_RTP) + && !TARGET_VXWORKS_VAROFF) { rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); @@ -13552,10 +13897,11 @@ print_reg (rtx x, int code, FILE *file) H -- print a memory address offset by 8; used for sse high-parts Y -- print condition for XOP pcom* instruction. V -- print naked full integer register name without %. + v -- print segment override prefix + -- print a branch hint as 'cs' or 'ds' prefix ; -- print a semicolon (after prefixes due to bug in older gas). ~ -- print "i" if TARGET_AVX2, "f" otherwise. - ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode + ^ -- print addr32 prefix if Pmode != word_mode M -- print addr32 prefix for TARGET_X32 with VSIB address. ! -- print NOTRACK prefix for jxx/call/ret instructions if required. N -- print maskz if it's constant 0 operand. @@ -14057,6 +14403,28 @@ ix86_print_operand (FILE *file, rtx x, int code) return; + case 'v': + if (MEM_P (x)) + { + switch (MEM_ADDR_SPACE (x)) + { + case ADDR_SPACE_GENERIC: + break; + case ADDR_SPACE_SEG_FS: + fputs ("fs ", file); + break; + case ADDR_SPACE_SEG_GS: + fputs ("gs ", file); + break; + default: + gcc_unreachable (); + } + } + else + output_operand_lossage ("operand is not a memory reference, " + "invalid operand code 'v'"); + return; + case '*': if (ASSEMBLER_DIALECT == ASM_ATT) putc ('*', file); @@ -14131,7 +14499,7 @@ ix86_print_operand (FILE *file, rtx x, int code) return; case '^': - if (TARGET_64BIT && Pmode != word_mode) + if (Pmode != word_mode) fputs ("addr32 ", file); return; @@ -14646,6 +15014,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x) output_addr_const (file, op); fputs ("@indntpoff", file); break; + case UNSPEC_SECREL32: + output_addr_const (file, op); + fputs ("@secrel32", file); + break; #if TARGET_MACHO case UNSPEC_MACHOPIC_OFFSET: output_addr_const (file, op); @@ -15500,7 +15872,7 @@ ix86_output_addr_diff_elt (FILE *file, int value, int rel) gcc_assert (!TARGET_64BIT); #endif /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ - if (TARGET_64BIT || TARGET_VXWORKS_RTP) + if (TARGET_64BIT || TARGET_VXWORKS_VAROFF) fprintf (file, "%s%s%d-%s%d\n", directive, LPREFIX, value, LPREFIX, rel); #if TARGET_MACHO @@ -17898,9 +18270,14 @@ ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) if (cum->decl && !TREE_PUBLIC (cum->decl)) return; - const_tree ctx = get_ultimate_context (cum->decl); - if (ctx != NULL_TREE - && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) + tree decl = cum->decl; + if (!decl) + /* If we don't know the target, look at the current TU. */ + decl = current_function_decl; + + const_tree ctx = get_ultimate_context (decl); + if (ctx == NULL_TREE + || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) return; /* If the actual size of the type is zero, then there is no change @@ -20037,14 +20414,10 @@ ix86_vectorize_builtin_scatter (const_tree vectype, { bool si; enum ix86_builtins code; - const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype)); if (!TARGET_AVX512F) return NULL_TREE; - if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64) - return NULL_TREE; - if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u) ? !TARGET_USE_SCATTER_2PARTS : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u) @@ -20787,7 +21160,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to, return true; /* x87 registers can't do subreg at all, as all values are reformatted - to extended precision. */ + to extended precision. + + ??? middle-end queries mode changes for ALL_REGS and this makes + vec_series_lowpart_p to always return false. We probably should + restrict this to modes supported by i387 and check if it is enabled. */ if (MAYBE_FLOAT_CLASS_P (regclass)) return false; @@ -21162,7 +21539,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) - any of 512-bit wide vector mode - any scalar mode. */ if (TARGET_AVX512F - && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512) + && ((VALID_AVX512F_REG_OR_XI_MODE (mode)) || VALID_AVX512F_SCALAR_MODE (mode))) return true; @@ -21333,19 +21710,20 @@ ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) return mode1 == SFmode; /* If MODE2 is only appropriate for an SSE register, then tie with - any other mode acceptable to SSE registers. */ - if (GET_MODE_SIZE (mode2) == 64 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) - return (GET_MODE_SIZE (mode1) == 64 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); - if (GET_MODE_SIZE (mode2) == 32 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) - return (GET_MODE_SIZE (mode1) == 32 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); - if (GET_MODE_SIZE (mode2) == 16 + any vector modes or scalar floating point modes acceptable to SSE + registers, excluding scalar integer modes with SUBREG: + (subreg:QI (reg:TI 99) 0)) + (subreg:HI (reg:TI 99) 0)) + (subreg:SI (reg:TI 99) 0)) + (subreg:DI (reg:TI 99) 0)) + to avoid unnecessary move from SSE register to integer register. + */ + if (GET_MODE_SIZE (mode2) >= 16 + && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2) + || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1)) + && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2))) && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) - return (GET_MODE_SIZE (mode1) == 16 - && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); + return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1); /* If MODE2 is appropriate for an MMX register, then tie with any other mode acceptable to MMX registers. */ @@ -21403,7 +21781,7 @@ ix86_set_reg_reg_cost (machine_mode mode) case MODE_VECTOR_INT: case MODE_VECTOR_FLOAT: - if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) + if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) @@ -21464,7 +21842,7 @@ ix86_widen_mult_cost (const struct processor_costs *cost, /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend, require extra 4 mul, 4 add, 4 cmp and 2 shift. */ if (!TARGET_SSE4_1 && !uns_p) - extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4 + extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4 + cost->sse_op * 2; /* Fallthru. */ case V4DImode: @@ -21514,11 +21892,11 @@ ix86_multiplication_cost (const struct processor_costs *cost, else if (TARGET_AVX2) nops += 2; else if (TARGET_XOP) - extra += cost->sse_load[2]; + extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; else { nops += 1; - extra += cost->sse_load[2]; + extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; } goto do_qimode; @@ -21537,13 +21915,13 @@ ix86_multiplication_cost (const struct processor_costs *cost, { nmults += 1; nops += 2; - extra += cost->sse_load[2]; + extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; } else { nmults += 1; nops += 4; - extra += cost->sse_load[2]; + extra += COSTS_N_INSNS (cost->sse_load[2]) / 2; } goto do_qimode; @@ -21556,14 +21934,16 @@ ix86_multiplication_cost (const struct processor_costs *cost, { nmults += 1; nops += 4; - extra += cost->sse_load[3] * 2; + /* 2 loads, so no division by 2. */ + extra += COSTS_N_INSNS (cost->sse_load[3]); } goto do_qimode; case V64QImode: nmults = 2; nops = 9; - extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2; + /* 2 loads of each size, so no division by 2. */ + extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]); do_qimode: return ix86_vec_cost (mode, cost->mulss * nmults @@ -21656,7 +22036,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, /* Use vpbroadcast. */ extra = cost->sse_op; else - extra = cost->sse_load[2]; + extra = COSTS_N_INSNS (cost->sse_load[2]) / 2; if (constant_op1) { @@ -21687,7 +22067,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, shift with one insn set the cost to prefer paddb. */ if (constant_op1) { - extra = cost->sse_load[2]; + extra = COSTS_N_INSNS (cost->sse_load[2]) / 2; return ix86_vec_cost (mode, cost->sse_op) + extra; } else @@ -21702,7 +22082,9 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, /* Use vpbroadcast. */ extra = cost->sse_op; else - extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3]; + extra = COSTS_N_INSNS (mode == V16QImode + ? cost->sse_load[2] + : cost->sse_load[3]) / 2; if (constant_op1) { @@ -21810,6 +22192,34 @@ ix86_insn_cost (rtx_insn *insn, bool speed) return insn_cost + pattern_cost (PATTERN (insn), speed); } +/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates). */ + +static int +vec_fp_conversion_cost (const struct processor_costs *cost, int size) +{ + if (size < 128) + return cost->cvtss2sd; + else if (size < 256) + { + if (TARGET_SSE_SPLIT_REGS) + return cost->cvtss2sd * size / 64; + return cost->cvtss2sd; + } + if (size < 512) + return cost->vcvtps2pd256; + else + return cost->vcvtps2pd512; +} + +/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */ + +static bool +unspec_pcmp_p (rtx x) +{ + return GET_CODE (x) == UNSPEC + && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP); +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -21827,9 +22237,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* Handling different vternlog variants. */ if ((GET_MODE_SIZE (mode) == 64 - ? (TARGET_AVX512F && TARGET_EVEX512) + ? TARGET_AVX512F : (TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256))) && GET_MODE_SIZE (mode) >= 16 && outer_code_i == SET && ternlog_operand (x, mode)) @@ -22178,8 +22588,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, { /* (ior (not ...) ...) can be a single insn in AVX512. */ if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F - && ((TARGET_EVEX512 - && GET_MODE_SIZE (mode) == 64) + && (GET_MODE_SIZE (mode) == 64 || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -22270,8 +22679,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* (and (not ...) (not ...)) can be a single insn in AVX512. */ if (GET_CODE (right) == NOT && TARGET_AVX512F - && ((TARGET_EVEX512 - && GET_MODE_SIZE (mode) == 64) + && (GET_MODE_SIZE (mode) == 64 || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -22341,8 +22749,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, { /* (not (xor ...)) can be a single insn in AVX512. */ if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F - && ((TARGET_EVEX512 - && GET_MODE_SIZE (mode) == 64) + && (GET_MODE_SIZE (mode) == 64 || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -22473,17 +22880,39 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case FLOAT_EXTEND: + /* x87 represents all values extended to 80bit. */ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = 0; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; case FLOAT_TRUNCATE: if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) *total = cost->fadd; else - *total = ix86_vec_cost (mode, cost->addss); + *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); + return false; + case FLOAT: + case UNSIGNED_FLOAT: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtpi2ps); + else + *total = cost->cvtsi2ss; + return false; + + case FIX: + case UNSIGNED_FIX: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtps2pi); + else + *total = cost->cvtss2si; return false; case ABS: @@ -22544,13 +22973,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } return false; - case VEC_SELECT: case VEC_CONCAT: /* ??? Assume all of these vector manipulation patterns are recognizable. In which case they all pretty much have the - same cost. */ + same cost. + ??? We should still recruse when computing cost. */ *total = cost->sse_op; return true; + + case VEC_SELECT: + /* Special case extracting lower part from the vector. + This by itself needs to code and most of SSE/AVX instructions have + packed and single forms where the single form may be represented + by such VEC_SELECT. + + Use cost 1 (despite the fact that functionally equivalent SUBREG has + cost 0). Making VEC_SELECT completely free, for example instructs CSE + to forward propagate VEC_SELECT into + + (set (reg eax) (reg src)) + + which then prevents fwprop and combining. See i.e. + gcc.target/i386/pr91103-1.c. + + ??? rtvec_series_p test should be, for valid patterns, equivalent to + vec_series_lowpart_p but is not, since the latter calls + can_cange_mode_class on ALL_REGS and this return false since x87 does + not support subregs at all. */ + if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0)) + *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), + outer_code, opno, speed) + 1; + else + /* ??? We should still recruse when computing cost. */ + *total = cost->sse_op; + return true; + case VEC_DUPLICATE: *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), @@ -22563,13 +23020,87 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case VEC_MERGE: mask = XEXP (x, 2); + /* Scalar versions of SSE instructions may be represented as: + + (vec_merge (vec_duplicate (operation ....)) + (register or memory) + (const_int 1)) + + In this case vec_merge and vec_duplicate is for free. + Just recurse into operation and second operand. */ + if (mask == const1_rtx + && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) + { + *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode, + outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } /* This is masked instruction, assume the same cost, as nonmasked variant. */ - if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) - *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); + else if (TARGET_AVX512F + && (register_operand (mask, GET_MODE (mask)) + /* Redunduant clean up of high bits for kmask with VL=2/4 + .i.e (vec_merge op0, op1, (and op3 15)). */ + || (GET_CODE (mask) == AND + && register_operand (XEXP (mask, 0), GET_MODE (mask)) + && CONST_INT_P (XEXP (mask, 1)) + && ((INTVAL (XEXP (mask, 1)) == 3 + && GET_MODE_NUNITS (mode) == 2) + || (INTVAL (XEXP (mask, 1)) == 15 + && GET_MODE_NUNITS (mode) == 4))))) + { + *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } + /* Combination of the two above: + + (vec_merge (vec_merge (vec_duplicate (operation ...)) + (register or memory) + (reg:QI mask)) + (register or memory) + (const_int 1)) + + i.e. avx512fp16_vcvtss2sh_mask. */ + else if (TARGET_AVX512F + && mask == const1_rtx + && GET_CODE (XEXP (x, 0)) == VEC_MERGE + && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE + && register_operand (XEXP (XEXP (x, 0), 2), + GET_MODE (XEXP (XEXP (x, 0), 2)))) + { + *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), + mode, outer_code, opno, speed) + + rtx_cost (XEXP (XEXP (x, 0), 1), + mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } + /* vcmp. */ + else if (unspec_pcmp_p (mask) + || (GET_CODE (mask) == NOT + && unspec_pcmp_p (XEXP (mask, 0)))) + { + rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask; + rtx unsop0 = XVECEXP (uns, 0, 0); + /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0) + cost the same as register. + This is used by avx_cmp<mode>3_ltint_not. */ + if (GET_CODE (unsop0) == SUBREG) + unsop0 = XEXP (unsop0, 0); + if (GET_CODE (unsop0) == NOT) + unsop0 = XEXP (unsop0, 0); + *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) + + rtx_cost (unsop0, mode, UNSPEC, opno, speed) + + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed) + + cost->sse_op; + return true; + } else *total = cost->sse_op; - return true; + return false; case MEM: /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast. @@ -22586,7 +23117,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } /* An insn that accesses memory is slightly more expensive - than one that does not. */ + than one that does not. */ if (speed) { *total += 1; @@ -22827,7 +23358,9 @@ x86_this_parameter (tree function) { const int *parm_regs; - if (ix86_function_type_abi (type) == MS_ABI) + if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type))) + parm_regs = x86_64_preserve_none_int_parameter_registers; + else if (ix86_function_type_abi (type) == MS_ABI) parm_regs = x86_64_ms_abi_int_parameter_registers; else parm_regs = x86_64_int_parameter_registers; @@ -23153,13 +23686,21 @@ x86_field_alignment (tree type, int computed) /* Print call to TARGET to FILE. */ static void -x86_print_call_or_nop (FILE *file, const char *target) +x86_print_call_or_nop (FILE *file, const char *target, + const char *label) { if (flag_nop_mcount || !strcmp (target, "nop")) /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ - fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); + fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n", + label); + else if (!TARGET_PECOFF && flag_pic) + { + gcc_assert (flag_plt); + + fprintf (file, "%s\tcall\t%s@PLT\n", label, target); + } else - fprintf (file, "1:\tcall\t%s\n", target); + fprintf (file, "%s\tcall\t%s\n", label, target); } static bool @@ -23244,6 +23785,13 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) const char *mcount_name = MCOUNT_NAME; + bool fentry_section_p + = (flag_record_mcount + || lookup_attribute ("fentry_section", + DECL_ATTRIBUTES (current_function_decl))); + + const char *label = fentry_section_p ? "1:" : ""; + if (current_fentry_name (&mcount_name)) ; else if (fentry_name) @@ -23279,11 +23827,12 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) reg = legacy_reg; } if (ASSEMBLER_DIALECT == ASM_INTEL) - fprintf (file, "1:\tmovabs\t%s, OFFSET FLAT:%s\n" - "\tcall\t%s\n", reg, mcount_name, reg); + fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n" + "\tcall\t%s\n", label, reg, mcount_name, + reg); else - fprintf (file, "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n", - mcount_name, reg, reg); + fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n", + label, mcount_name, reg, reg); break; case CM_LARGE_PIC: #ifdef NO_PROFILE_COUNTERS @@ -23321,24 +23870,24 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) break; case CM_SMALL_PIC: case CM_MEDIUM_PIC: - if (!ix86_direct_extern_access) + if (!flag_plt) { if (ASSEMBLER_DIALECT == ASM_INTEL) - fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n", - mcount_name); + fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n", + label, mcount_name); else - fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", - mcount_name); + fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n", + label, mcount_name); break; } /* fall through */ default: - x86_print_call_or_nop (file, mcount_name); + x86_print_call_or_nop (file, mcount_name, label); break; } } else - x86_print_call_or_nop (file, mcount_name); + x86_print_call_or_nop (file, mcount_name, label); } else if (flag_pic) { @@ -23352,10 +23901,14 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n", LPREFIX, labelno); #endif - if (ASSEMBLER_DIALECT == ASM_INTEL) - fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name); + if (flag_plt) + x86_print_call_or_nop (file, mcount_name, label); + else if (ASSEMBLER_DIALECT == ASM_INTEL) + fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n", + label, mcount_name); else - fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); + fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n", + label, mcount_name); } else { @@ -23368,12 +23921,10 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n", LPREFIX, labelno); #endif - x86_print_call_or_nop (file, mcount_name); + x86_print_call_or_nop (file, mcount_name, label); } - if (flag_record_mcount - || lookup_attribute ("fentry_section", - DECL_ATTRIBUTES (current_function_decl))) + if (fentry_section_p) { const char *sname = "__mcount_loc"; @@ -24132,7 +24683,7 @@ ix86_vector_mode_supported_p (machine_mode mode) return true; if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) return true; - if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) + if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) return true; if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode)) @@ -24380,8 +24931,7 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, } } - rtx_insn *seq = get_insns (); - end_sequence (); + rtx_insn *seq = end_sequence (); if (saw_asm_flag) return seq; @@ -24669,7 +25219,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, switch (type_of_cost) { case scalar_stmt: - return fp ? ix86_cost->addss : COSTS_N_INSNS (1); + return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: /* load/store costs are relative to register move which is 2. Recompute @@ -24740,7 +25290,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return ix86_cost->cond_not_taken_branch_cost; case vec_perm: + return ix86_vec_cost (mode, ix86_cost->sse_op); + case vec_promote_demote: + if (fp) + return vec_fp_conversion_cost (ix86_tune_cost, mode); return ix86_vec_cost (mode, ix86_cost->sse_op); case vec_construct: @@ -24753,12 +25307,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, /* One vinserti128 for combining two SSE vectors for AVX256. */ else if (GET_MODE_BITSIZE (mode) == 256) return ((n - 2) * ix86_cost->sse_op - + ix86_vec_cost (mode, ix86_cost->addss)); + + ix86_vec_cost (mode, ix86_cost->sse_op)); /* One vinserti64x4 and two vinserti128 for combining SSE and AVX256 vectors to AVX512. */ else if (GET_MODE_BITSIZE (mode) == 512) - return ((n - 4) * ix86_cost->sse_op - + 3 * ix86_vec_cost (mode, ix86_cost->addss)); + { + machine_mode half_mode + = mode_for_vector (GET_MODE_INNER (mode), + GET_MODE_NUNITS (mode) / 2).require (); + return ((n - 4) * ix86_cost->sse_op + + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op) + + ix86_vec_cost (mode, ix86_cost->sse_op)); + } gcc_unreachable (); } @@ -24926,7 +25486,7 @@ ix86_preferred_simd_mode (scalar_mode mode) switch (mode) { case E_QImode: - if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) return V64QImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V32QImode; @@ -24934,7 +25494,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V16QImode; case E_HImode: - if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) return V32HImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V16HImode; @@ -24942,7 +25502,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V8HImode; case E_SImode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V16SImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V8SImode; @@ -24950,7 +25510,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V4SImode; case E_DImode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V8DImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V4DImode; @@ -24964,16 +25524,15 @@ ix86_preferred_simd_mode (scalar_mode mode) { if (TARGET_PREFER_AVX128) return V8HFmode; - else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512) + else if (TARGET_PREFER_AVX256) return V16HFmode; } - if (TARGET_EVEX512) - return V32HFmode; + return V32HFmode; } return word_mode; case E_BFmode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V32BFmode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V16BFmode; @@ -24981,7 +25540,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V8BFmode; case E_SFmode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V16SFmode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V8SFmode; @@ -24989,7 +25548,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V4SFmode; case E_DFmode: - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) return V8DFmode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V4DFmode; @@ -25009,13 +25568,13 @@ ix86_preferred_simd_mode (scalar_mode mode) static unsigned int ix86_autovectorize_vector_modes (vector_modes *modes, bool all) { - if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && !TARGET_PREFER_AVX256) { modes->safe_push (V64QImode); modes->safe_push (V32QImode); modes->safe_push (V16QImode); } - else if (TARGET_AVX512F && TARGET_EVEX512 && all) + else if (TARGET_AVX512F && all) { modes->safe_push (V32QImode); modes->safe_push (V16QImode); @@ -25053,7 +25612,7 @@ ix86_get_mask_mode (machine_mode data_mode) unsigned elem_size = vector_size / nunits; /* Scalar mask case. */ - if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64) + if ((TARGET_AVX512F && vector_size == 64) || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)) /* AVX512FP16 only supports vector comparison to kmask for _Float16. */ @@ -25239,6 +25798,14 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, if (scalar_p) mode = TYPE_MODE (TREE_TYPE (vectype)); } + /* When we are costing a scalar stmt use the scalar stmt to get at the + type of the operation. */ + else if (scalar_p && stmt_info) + if (tree lhs = gimple_get_lhs (stmt_info->stmt)) + { + fp = FLOAT_TYPE_P (TREE_TYPE (lhs)); + mode = TYPE_MODE (TREE_TYPE (lhs)); + } if ((kind == vector_stmt || kind == scalar_stmt) && stmt_info @@ -25261,7 +25828,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, else if (X87_FLOAT_MODE_P (mode)) stmt_cost = ix86_cost->fadd; else - stmt_cost = ix86_cost->add; + stmt_cost = ix86_cost->add; } else stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss @@ -25316,7 +25883,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (subcode == RSHIFT_EXPR && !TYPE_UNSIGNED (TREE_TYPE (op1))) ? ASHIFTRT : LSHIFTRT, mode, - TREE_CODE (op2) == INTEGER_CST, + TREE_CODE (op2) == INTEGER_CST, cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1, false, false, NULL, NULL); @@ -25325,27 +25892,174 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case NOP_EXPR: /* Only sign-conversions are free. */ if (tree_nop_conversion_p - (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) stmt_cost = 0; + else if (fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + break; + + case FLOAT_EXPR: + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ix86_cost->cvtsi2ss; + else if (X87_FLOAT_MODE_P (mode)) + /* TODO: We do not have cost tables for x87. */ + stmt_cost = ix86_cost->fadd; + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps); + break; + + case FIX_TRUNC_EXPR: + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ix86_cost->cvtss2si; + else if (X87_FLOAT_MODE_P (mode)) + /* TODO: We do not have cost tables for x87. */ + stmt_cost = ix86_cost->fadd; + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi); + break; + + case COND_EXPR: + { + /* SSE2 conditinal move sequence is: + pcmpgtd %xmm5, %xmm0 (accounted separately) + pand %xmm0, %xmm2 + pandn %xmm1, %xmm0 + por %xmm2, %xmm0 + while SSE4 uses cmp + blend + and AVX512 masked moves. + + The condition is accounted separately since we usually have + p = a < b + c = p ? x : y + and we will account first statement as setcc. Exception is when + p is loaded from memory as bool and then we will not acocunt + the compare, but there is no way to check for this. */ + + int ninsns = TARGET_SSE4_1 ? 1 : 3; + + /* If one of parameters is 0 or -1 the sequence will be simplified: + (if_true & mask) | (if_false & ~mask) -> if_true & mask */ + if (ninsns > 1 + && (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || zerop (gimple_assign_rhs3 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs3 (stmt_info->stmt)))) + ninsns = 1; + + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ninsns * ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op); + else + /* compare (accounted separately) + cmov. */ + stmt_cost = ix86_cost->add; + } break; - case BIT_IOR_EXPR: - case ABS_EXPR: - case ABSU_EXPR: case MIN_EXPR: case MAX_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode) + && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else + /* minss */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vpmin was introduced in SSE3. + SSE2 needs pcmpgtd + pand + pandn + pxor. + If one of parameters is 0 or -1 the sequence is simplified + to pcmpgtd + pand. */ + if (!TARGET_SSSE3) + { + if (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt))) + stmt_cost *= 2; + else + stmt_cost *= 4; + } + } + else + /* cmp + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case ABS_EXPR: + case ABSU_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode) + && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* fabs. */ + stmt_cost = ix86_cost->fabs; + else + /* andss of sign bit. */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vabs was introduced in SSE3. + SSE3 uses psrat + pxor + psub. */ + if (!TARGET_SSSE3) + stmt_cost *= 3; + } + else + /* neg + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case BIT_IOR_EXPR: case BIT_XOR_EXPR: case BIT_AND_EXPR: case BIT_NOT_EXPR: - if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) - stmt_cost = ix86_cost->sse_op; - else if (VECTOR_MODE_P (mode)) + gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode) + && !X87_FLOAT_MODE_P (mode)); + if (VECTOR_MODE_P (mode)) stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); else stmt_cost = ix86_cost->add; break; + default: + if (truth_value_p (subcode)) + { + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* CMPccS? insructions are cheap, so use sse_op. While they + produce a mask which may need to be turned to 0/1 by and, + expect that this will be optimized away in a common case. */ + stmt_cost = ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* fcmp + setcc. */ + stmt_cost = ix86_cost->fadd + ix86_cost->add; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + else + /* setcc. */ + stmt_cost = ix86_cost->add; + break; + } break; } } @@ -25369,6 +26083,37 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } + if (kind == vec_promote_demote) + { + int outer_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)))); + int inner_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); + bool inner_fp = FLOAT_TYPE_P + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))); + + if (fp && inner_fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + else if (fp && !inner_fp) + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps); + else if (!fp && inner_fp) + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi); + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is + greater than inner size we will end up doing two conversions and + packing them. We always pack pairs; if the size difference is greater + it is split into multiple demote operations. */ + if (inner_size > outer_size) + stmt_cost = stmt_cost * 2 + + ix86_vec_cost (mode, ix86_cost->sse_op); + } + /* If we do elementwise loads into a vector then we are bound by latency and execution resources for the many scalar loads (AGU and load ports). Try to account for this by scaling the @@ -25439,7 +26184,22 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, else { m_num_gpr_needed[where]++; - stmt_cost += ix86_cost->sse_to_integer; + + int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; + + /* For integer construction, the number of actual GPR -> XMM + moves will be somewhere between 0 and n. + We do not have very good idea about actual number, since + the source may be a constant, memory or a chain of + instructions that will be later converted by + scalar-to-vector pass. */ + if (kind == vec_construct + && GET_MODE_BITSIZE (mode) == 256) + cost *= 2; + else if (kind == vec_construct + && GET_MODE_BITSIZE (mode) == 512) + cost *= 3; + stmt_cost += cost; } } } @@ -25531,14 +26291,10 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both a AVX2 and a SSE epilogue for AVX512 vectorized loops. */ if (loop_vinfo + && LOOP_VINFO_EPILOGUE_P (loop_vinfo) + && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32 && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES]) - { - if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64) - m_suggested_epilogue_mode = V32QImode; - else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo) - && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32) - m_suggested_epilogue_mode = V16QImode; - } + m_suggested_epilogue_mode = V16QImode; /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger enable a 64bit SSE epilogue. */ if (loop_vinfo @@ -25547,6 +26303,65 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16) m_suggested_epilogue_mode = V8QImode; + /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use + a masked epilogue if that doesn't seem detrimental. */ + if (loop_vinfo + && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) + && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2 + && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES] + && !OPTION_SET_P (param_vect_partial_vector_usage)) + { + bool avoid = false; + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) + { + unsigned int peel_niter + = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + peel_niter += 1; + /* When we know the number of scalar iterations of the epilogue, + avoid masking when a single vector epilog iteration handles + it in full. */ + if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter) + % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())) + avoid = true; + } + if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo)))) + for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo)) + { + if (DDR_ARE_DEPENDENT (ddr) == chrec_known) + ; + else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + ; + else + { + int loop_depth + = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num, + DDR_LOOP_NEST (ddr)); + if (DDR_NUM_DIST_VECTS (ddr) == 1 + && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0) + { + /* Avoid the case when there's an outer loop that might + traverse a multi-dimensional array with the inner + loop just executing the masked epilogue with a + read-write where the next outer iteration might + read from the masked part of the previous write, + 'n' filling half a vector. + for (j = 0; j < m; ++j) + for (i = 0; i < n; ++i) + a[j][i] = c * a[j][i]; */ + avoid = true; + break; + } + } + } + if (!avoid) + { + m_suggested_epilogue_mode = loop_vinfo->vector_mode; + m_masked_epilogue = 1; + } + } + vector_costs::finish_cost (scalar_costs); } @@ -25666,7 +26481,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, { /* If the function isn't exported, we can pick up just one ISA for the clones. */ - if (TARGET_AVX512F && TARGET_EVEX512) + if (TARGET_AVX512F) clonei->vecsize_mangle = 'e'; else if (TARGET_AVX2) clonei->vecsize_mangle = 'd'; @@ -25758,17 +26573,17 @@ ix86_simd_clone_usable (struct cgraph_node *node, machine_mode) return -1; if (!TARGET_AVX) return 0; - return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1; + return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1; case 'c': if (!TARGET_AVX) return -1; - return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0; + return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0; case 'd': if (!TARGET_AVX2) return -1; - return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0; + return TARGET_AVX512F ? 1 : 0; case 'e': - if (!TARGET_AVX512F || !TARGET_EVEX512) + if (!TARGET_AVX512F) return -1; return 0; default: @@ -27440,6 +28255,195 @@ ix86_cannot_copy_insn_p (rtx_insn *insn) #undef TARGET_DOCUMENTATION_NAME #define TARGET_DOCUMENTATION_NAME "x86" +/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ +sbitmap +ix86_get_separate_components (void) +{ + HOST_WIDE_INT offset, to_allocate; + sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); + bitmap_clear (components); + struct machine_function *m = cfun->machine; + + offset = m->frame.stack_pointer_offset; + to_allocate = offset - m->frame.sse_reg_save_offset; + + /* Shrink wrap separate uses MOV, which means APX PPX cannot be used. + Experiments show that APX PPX can speed up the prologue. If the function + does not exit early during actual execution, then using APX PPX is faster. + If the function always exits early during actual execution, then shrink + wrap separate reduces the number of MOV (PUSH/POP) instructions actually + executed, thus speeding up execution. + foo: + movl $1, %eax + testq %rdi, %rdi + jne.L60 + ret ---> early return. + .L60: + subq $88, %rsp ---> belong to prologue. + xorl %eax, %eax + movq %rbx, 40 (%rsp) ---> belong to prologue. + movq 8 (%rdi), %rbx + movq %rbp, 48 (%rsp) ---> belong to prologue. + movq %rdi, %rbp + testq %rbx, %rbx + jne.L61 + movq 40 (%rsp), %rbx + movq 48 (%rsp), %rbp + addq $88, %rsp + ret + .L61: + movq %r12, 56 (%rsp) ---> belong to prologue. + movq %r13, 64 (%rsp) ---> belong to prologue. + movq %r14, 72 (%rsp) ---> belong to prologue. + ... ... + + Disable shrink wrap separate when PPX is enabled. */ + if ((TARGET_APX_PPX && !crtl->calls_eh_return) + || cfun->machine->func_type != TYPE_NORMAL + || TARGET_SEH + || crtl->stack_realign_needed + || m->call_ms2sysv) + return components; + + /* Since shrink wrapping separate uses MOV instead of PUSH/POP. + Disable shrink wrap separate when MOV is prohibited. */ + if (save_regs_using_push_pop (to_allocate)) + return components; + + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + /* Skip registers with large offsets, where a pseudo may be needed. */ + if (IN_RANGE (offset, -0x8000, 0x7fff)) + bitmap_set_bit (components, regno); + offset += UNITS_PER_WORD; + } + + /* Don't mess with the following registers. */ + if (frame_pointer_needed) + bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); + + if (crtl->drap_reg) + bitmap_clear_bit (components, REGNO (crtl->drap_reg)); + + if (pic_offset_table_rtx) + bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM); + + return components; +} + +/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */ +sbitmap +ix86_components_for_bb (basic_block bb) +{ + bitmap in = DF_LIVE_IN (bb); + bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; + bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; + + sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER); + bitmap_clear (components); + + function_abi_aggregator callee_abis; + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + if (CALL_P (insn)) + callee_abis.note_callee_abi (insn_callee_abi (insn)); + HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi); + + /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (!fixed_regs[regno] + && (TEST_HARD_REG_BIT (extra_caller_saves, regno) + || bitmap_bit_p (in, regno) + || bitmap_bit_p (gen, regno) + || bitmap_bit_p (kill, regno))) + bitmap_set_bit (components, regno); + + return components; +} + +/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */ +void +ix86_disqualify_components (sbitmap, edge, sbitmap, bool) +{ + /* Nothing to do for x86. */ +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */ +void +ix86_emit_prologue_components (sbitmap components) +{ + HOST_WIDE_INT cfa_offset; + struct machine_function *m = cfun->machine; + + cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset + - m->frame.stack_pointer_offset; + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + if (bitmap_bit_p (components, regno)) + ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); + cfa_offset -= UNITS_PER_WORD; + } +} + +/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ +void +ix86_emit_epilogue_components (sbitmap components) +{ + HOST_WIDE_INT cfa_offset; + struct machine_function *m = cfun->machine; + cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset + - m->frame.stack_pointer_offset; + + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) + { + if (bitmap_bit_p (components, regno)) + { + rtx reg = gen_rtx_REG (word_mode, regno); + rtx mem; + rtx_insn *insn; + + mem = choose_baseaddr (cfa_offset, NULL); + mem = gen_frame_mem (word_mode, mem); + insn = emit_move_insn (reg, mem); + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, reg); + } + cfa_offset -= UNITS_PER_WORD; + } +} + +/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */ +void +ix86_set_handled_components (sbitmap components) +{ + for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (bitmap_bit_p (components, regno)) + { + cfun->machine->reg_is_wrapped_separately[regno] = true; + cfun->machine->use_fast_prologue_epilogue = true; + cfun->machine->frame.save_regs_using_mov = true; + } +} + +#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS +#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components +#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB +#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb +#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS +#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components +#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \ + ix86_emit_prologue_components +#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS +#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \ + ix86_emit_epilogue_components +#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS +#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8507243..3f7ad68 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -179,6 +179,7 @@ struct processor_costs { const int xmm_move, ymm_move, /* cost of moving XMM and YMM register. */ zmm_move; const int sse_to_integer; /* cost of moving SSE register to integer. */ + const int integer_to_sse; /* cost of moving integer register to SSE. */ const int gather_static, gather_per_elt; /* Cost of gather load is computed as static + per_item * nelts. */ const int scatter_static, scatter_per_elt; /* Cost of gather store is @@ -207,6 +208,16 @@ struct processor_costs { const int divsd; /* cost of DIVSD instructions. */ const int sqrtss; /* cost of SQRTSS instructions. */ const int sqrtsd; /* cost of SQRTSD instructions. */ + const int cvtss2sd; /* cost SSE FP conversions, + such as CVTSS2SD. */ + const int vcvtps2pd256; /* cost 256bit packed FP conversions, + such as VCVTPD2PS with larger reg in ymm. */ + const int vcvtps2pd512; /* cost 512bit packed FP conversions, + such as VCVTPD2PS with larger reg in zmm. */ + const int cvtsi2ss; /* cost of CVTSI2SS instruction. */ + const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */ + const int cvtpi2ps; /* cost of CVTPI2PS instruction. */ + const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp @@ -479,7 +490,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_SSE_MOVCC_USE_BLENDV \ ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV] #define TARGET_ALIGN_TIGHT_LOOPS \ - ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS] + ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS] +#define TARGET_SSE_REDUCTION_PREFER_PSHUF \ + ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF] /* Feature tests against the various architecture variations. */ @@ -525,6 +538,7 @@ extern unsigned char ix86_prefetch_sse; #define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2) #define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS) #define TARGET_SUN_TLS 0 +#define TARGET_WIN32_TLS 0 #ifndef TARGET_64BIT_DEFAULT #define TARGET_64BIT_DEFAULT 0 @@ -804,7 +818,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); TARGET_ABSOLUTE_BIGGEST_ALIGNMENT. */ #define BIGGEST_ALIGNMENT \ - (TARGET_IAMCU ? 32 : ((TARGET_AVX512F && TARGET_EVEX512) \ + (TARGET_IAMCU ? 32 : (TARGET_AVX512F \ ? 512 : (TARGET_AVX ? 256 : 128))) /* Maximum stack alignment. */ @@ -1682,6 +1696,8 @@ typedef struct ix86_args { int stdarg; /* Set to 1 if function is stdarg. */ enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise MS_ABI for ms abi. */ + bool preserve_none_abi; /* Set to true if the preserve_none ABI is + used. */ tree decl; /* Callee decl. */ } CUMULATIVE_ARGS; @@ -1883,7 +1899,7 @@ typedef struct ix86_args { MOVE_MAX_PIECES defaults to MOVE_MAX. */ #define MOVE_MAX \ - ((TARGET_AVX512F && TARGET_EVEX512\ + ((TARGET_AVX512F \ && (ix86_move_max == PVW_AVX512 \ || ix86_store_max == PVW_AVX512)) \ ? 64 \ @@ -1902,7 +1918,7 @@ typedef struct ix86_args { store_by_pieces of 16/32/64 bytes. */ #define STORE_MAX_PIECES \ (TARGET_INTER_UNIT_MOVES_TO_VEC \ - ? ((TARGET_AVX512F && TARGET_EVEX512 && ix86_store_max == PVW_AVX512) \ + ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \ ? 64 \ : ((TARGET_AVX \ && ix86_store_max >= PVW_AVX256) \ @@ -2255,6 +2271,13 @@ extern unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER]; } while (0) #endif +/* In Intel syntax, we have to quote user-defined labels that would + match (unprefixed) registers or operators. */ + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ + ix86_asm_output_labelref ((STREAM), user_label_prefix, (NAME)) + /* Under some conditions we need jump tables in the text section, because the assembler cannot handle label differences between sections. */ @@ -2396,13 +2419,13 @@ constexpr wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SGX; constexpr wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU - | PTA_CLWB | PTA_EVEX512; + | PTA_CLWB; constexpr wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512 | PTA_AVX512VNNI; constexpr wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16; constexpr wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU - | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA | PTA_EVEX512; + | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA; constexpr wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG | PTA_RDPID | PTA_AVX512VPOPCNTDQ; @@ -2425,14 +2448,16 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID | PTA_SGX | PTA_PTWRITE; constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG; -constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX +constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB + | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; -constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA - | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR; +constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE + | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD + | PTA_ENQCMD | PTA_UINTR; constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16 - | PTA_PREFETCHI; + | PTA_PREFETCHI | PTA_AVX10_1; constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS | PTA_AMX_COMPLEX; constexpr wide_int_bitmask PTA_GRANDRIDGE = PTA_SIERRAFOREST; @@ -2444,16 +2469,11 @@ constexpr wide_int_bitmask PTA_CLEARWATERFOREST = PTA_SIERRAFOREST | PTA_AVXVNNIINT16 | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_USER_MSR | PTA_PREFETCHI; constexpr wide_int_bitmask PTA_PANTHERLAKE = PTA_ARROWLAKE_S | PTA_PREFETCHI; -constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA - | PTA_GFNI | PTA_VAES | PTA_VPCLMULQDQ | PTA_RDPID | PTA_PCONFIG - | PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD - | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK - | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI - | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256 - | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 - | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 - | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 - | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; +constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D + | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8 + | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2 + | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE + | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 @@ -2480,7 +2500,7 @@ constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ | PTA_AVX512IFMA | PTA_AVX512CD | PTA_AVX512BW | PTA_AVX512VL | PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI - | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ | PTA_EVEX512; + | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ; constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI; @@ -2782,11 +2802,13 @@ enum call_saved_registers_type or "no_caller_saved_registers" attribute. */ TYPE_NO_CALLER_SAVED_REGISTERS, /* The current function is a function specified with the - "no_callee_saved_registers" attribute. */ + "no_callee_saved_registers" attribute or a function specified with + the "noreturn" attribute when compiled with + "-mnoreturn-no-callee-saved-registers". */ TYPE_NO_CALLEE_SAVED_REGISTERS, - /* The current function is a function specified with the "noreturn" - attribute. */ - TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP, + /* The current function is a function specified with the + "preserve_none" attribute. */ + TYPE_PRESERVE_NONE, }; enum queued_insn_type @@ -2805,6 +2827,10 @@ struct GTY(()) machine_function { /* Cached initial frame layout for the current function. */ struct ix86_frame frame; + /* The components already handled by separate shrink-wrapping, which should + not be considered by the prologue and epilogue. */ + bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER]; + /* For -fsplit-stack support: A stack local which holds a pointer to the stack arguments for a function with a variable number of arguments. This is set at the start of the function and is used @@ -2859,7 +2885,7 @@ struct GTY(()) machine_function { ENUM_BITFIELD(indirect_branch) function_return_type : 3; /* Call saved registers type. */ - ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 2; + ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 3; /* If true, there is register available for argument passing. This is used only in ix86_function_ok_for_sibcall by 32-bit to determine @@ -2904,6 +2930,9 @@ struct GTY(()) machine_function { /* True if inline asm with redzone clobber has been seen. */ BOOL_BITFIELD asm_redzone_clobber_seen : 1; + /* True if this is a recursive function. */ + BOOL_BITFIELD recursive_function : 1; + /* The largest alignment, in bytes, of stack slot actually used. */ unsigned int max_used_stack_alignment; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d6b2f29..83c438b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -58,10 +58,11 @@ ;; H -- print a memory address offset by 8; used for sse high-parts ;; K -- print HLE lock prefix ;; Y -- print condition for XOP pcom* instruction. +;; v -- print segment override prefix ;; + -- print a branch hint as 'cs' or 'ds' prefix ;; ; -- print a semicolon (after prefixes due to bug in older gas). ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise. -;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode +;; ^ -- print addr32 prefix if Pmode != word_mode ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required. (define_c_enum "unspec" [ @@ -79,6 +80,7 @@ UNSPEC_MACHOPIC_OFFSET UNSPEC_PCREL UNSPEC_SIZEOF + UNSPEC_SECREL32 ;; Prologue support UNSPEC_STACK_ALLOC @@ -579,12 +581,11 @@ (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64, sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, - avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512, - noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq, - noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, - avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, - avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, - vaes_avx512vl,noapx_nf,avx10_2" + avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, + avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl, + avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma, + avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl, + avx_noavx512f,avx_noavx512vl,vaes_avx512vl,noapx_nf,avx10_2" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. @@ -954,12 +955,8 @@ (eq_attr "isa" "fma_or_avx512vl") (symbol_ref "TARGET_FMA || TARGET_AVX512VL") (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F") - (eq_attr "isa" "avx512f_512") - (symbol_ref "TARGET_AVX512F && TARGET_EVEX512") (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F") (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW") - (eq_attr "isa" "avx512bw_512") - (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512") (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW") (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ") (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") @@ -1495,7 +1492,7 @@ [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] - "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256" + "TARGET_AVX512F && !TARGET_PREFER_AVX256" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); @@ -1602,6 +1599,20 @@ [(set_attr "type" "icmp") (set_attr "mode" "<MODE>")]) +(define_insn "*cmp<mode>_plus_1" + [(set (reg FLAGS_REG) + (compare + (plus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m") + (match_operand:SWI 1 "x86_64_neg_const_int_operand" "n")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" +{ + operands[1] = gen_int_mode (-INTVAL (operands[1]), <MODE>mode); + return "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "icmp") + (set_attr "mode" "<MODE>")]) + (define_insn "*cmpqi_ext<mode>_1" [(set (reg FLAGS_REG) (compare @@ -2374,7 +2385,7 @@ (define_expand "movxi" [(set (match_operand:XI 0 "nonimmediate_operand") (match_operand:XI 1 "general_operand"))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "ix86_expand_vector_move (XImode, operands); DONE;") (define_expand "movoi" @@ -2427,22 +2438,32 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) -(define_insn "*mov<mode>_and" +;; Generate shorter "and $0,mem" for -Oz. Split it to "mov $0,mem" +;; otherwise. +(define_insn_and_split "*mov<mode>_and" [(set (match_operand:SWI248 0 "memory_operand" "=m") (match_operand:SWI248 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "and{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& !(optimize_insn_for_size_p () && optimize_size > 1)" + [(set (match_dup 0) (match_dup 1))] + "" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) -(define_insn "*mov<mode>_or" +;; Generate shorter "or $-1,mem" for -Oz. Split it to "mov $-1,mem" +;; otherwise. +(define_insn_and_split "*mov<mode>_or" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "or{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& !(optimize_insn_for_size_p () && optimize_size > 1)" + [(set (match_dup 0) (match_dup 1))] + "" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) @@ -2450,7 +2471,7 @@ (define_insn "*movxi_internal_avx512f" [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m") (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && (register_operand (operands[0], XImode) || register_operand (operands[1], XImode))" { @@ -2947,6 +2968,7 @@ (match_operand:SWI248 1 "const_int_operand"))] "optimize_insn_for_size_p () && optimize_size > 1 && operands[1] != const0_rtx + && operands[1] != constm1_rtx && IN_RANGE (INTVAL (operands[1]), -128, 127) && !ix86_red_zone_used && REGNO (operands[0]) != SP_REG" @@ -4414,7 +4436,7 @@ (eq_attr "alternative" "11") (const_string "DI") (eq_attr "alternative" "5") - (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512") + (cond [(and (match_test "TARGET_AVX512F") (not (match_test "TARGET_PREFER_AVX256"))) (const_string "V16SF") (match_test "TARGET_AVX") @@ -5482,7 +5504,7 @@ (set_attr "memory" "none") (set (attr "enabled") (if_then_else (eq_attr "alternative" "2") - (symbol_ref "TARGET_AVX512F && TARGET_EVEX512 + (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL && !TARGET_PREFER_AVX256") (const_string "*")))]) @@ -5704,7 +5726,7 @@ /* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly, and it always round to even. - flag_unsafte_math_optimization is needed for psrld. + flag_unsafe_math_optimization is needed for psrld. If we don't expect qNaNs nor sNaNs and can assume rounding to nearest, we can expand the conversion inline as (fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16. */ @@ -8708,6 +8730,34 @@ (set (match_dup 1) (minus:SWI (match_dup 1) (match_dup 0)))])]) +;; Under APX NDD, 'sub reg, mem, reg' is valid. +;; New format for +;; mov reg0, mem1 +;; sub reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; sub mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SWI 2 "memory_operand") + (match_dup 0))) + (set (match_dup 0) + (minus:SWI (match_dup 2) (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 2) (match_dup 0))) + (set (match_dup 2) + (minus:SWI (match_dup 2) (match_dup 0)))])]) + ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into ;; subl $1, %eax; jnc .Lxx; (define_peephole2 @@ -9155,6 +9205,118 @@ (match_dup 1)) (match_dup 0)))])]) +;; Under APX NDD, 'adc reg, mem, reg' is valid. +;; +;; New format for +;; mov reg0, mem1 +;; adc reg0, mem2, reg0 +;; mov mem1, reg0 +;; to +;; mov reg0, mem2 +;; adc mem1, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 2 "memory_operand")) + (match_dup 0))) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))]) + (set (match_dup 1) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0))) + (plus:<DWI> + (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 1) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 0)))])]) + +;; New format for +;; mov reg0, mem1 +;; adc reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; adc mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI48 2 "memory_operand")) + (match_dup 0))) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 0) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> + (plus:SWI48 + (plus:SWI48 + (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0))) + (plus:<DWI> + (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 2) + (plus:SWI48 (plus:SWI48 (match_op_dup 5 + [(match_dup 3) (const_int 0)]) + (match_dup 2)) + (match_dup 0)))])]) + (define_peephole2 [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC @@ -9635,6 +9797,52 @@ [(match_dup 3) (const_int 0)])) (match_dup 0)))])]) +;; Under APX NDD, 'sbb reg, mem, reg' is valid. +;; +;; New format for +;; mov reg0, mem1 +;; sbb reg0, mem2, reg0 +;; mov mem2, reg0 +;; to +;; mov reg0, mem1 +;; sbb mem2, reg0 +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "memory_operand")) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> (match_operand:SWI48 2 "memory_operand")) + (plus:<DWI> + (match_operator:<DWI> 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (zero_extend:<DWI> + (match_dup 0))))) + (set (match_dup 0) + (minus:SWI48 + (minus:SWI48 + (match_dup 2) + (match_operator:SWI48 5 "ix86_carry_flag_operator" + [(match_dup 3) (const_int 0)])) + (match_dup 0)))]) + (set (match_dup 2) (match_dup 0))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extend:<DWI> (match_dup 2)) + (plus:<DWI> (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (zero_extend:<DWI> (match_dup 0))))) + (set (match_dup 2) + (minus:SWI48 (minus:SWI48 (match_dup 2) + (match_op_dup 5 + [(match_dup 3) (const_int 0)])) + (match_dup 0)))])]) + (define_peephole2 [(set (match_operand:SWI48 6 "general_reg_operand") (match_operand:SWI48 7 "memory_operand")) @@ -14561,6 +14769,17 @@ (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0))) (set (match_dup 0) (neg:SWI (match_dup 0)))])]) +;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384) with APX_F +(define_peephole2 + [(parallel [(set (match_operand:SWI 0 "general_reg_operand") + (neg:SWI (match_operand:SWI 1 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))] + "TARGET_APX_NDD" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:SWI (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:SWI (match_dup 1)))])]) + ;; Special expand pattern to handle integer mode abs (define_expand "abs<mode>2" @@ -19894,7 +20113,7 @@ /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ - if (TARGET_64BIT || TARGET_VXWORKS_RTP) + if (TARGET_64BIT || TARGET_VXWORKS_VAROFF) { code = PLUS; op0 = operands[0]; @@ -20762,7 +20981,7 @@ (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" { - if (flag_pic && !TARGET_VXWORKS_RTP) + if (flag_pic && !TARGET_VXWORKS_GOTTPIC) ix86_pc_thunk_call_expanded = true; }) @@ -20783,7 +21002,7 @@ (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" { - if (flag_pic && !TARGET_VXWORKS_RTP) + if (flag_pic && !TARGET_VXWORKS_GOTTPIC) ix86_pc_thunk_call_expanded = true; }) @@ -21315,11 +21534,12 @@ (set_attr "mode" "SI")]) ; As bsr is undefined behavior on zero and for other input -; values it is in range 0 to 63, we can optimize away sign-extends. -(define_insn_and_split "*bsr_rex64_2" +; values it is in range 0 to 63, we can optimize away sign-extends +; or zero-extends. +(define_insn_and_split "*bsr_rex64<u>_2" [(set (match_operand:DI 0 "register_operand") (xor:DI - (sign_extend:DI + (any_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) @@ -21341,9 +21561,9 @@ operands[3] = lowpart_subreg (SImode, operands[2], DImode); }) -(define_insn_and_split "*bsr_2" +(define_insn_and_split "*bsr<u>_2" [(set (match_operand:DI 0 "register_operand") - (sign_extend:DI + (any_extend:DI (xor:SI (minus:SI (const_int 31) @@ -21420,7 +21640,7 @@ (minus:DI (match_operand:DI 2 "const_int_operand") (xor:DI - (sign_extend:DI + (any_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) @@ -21450,7 +21670,7 @@ [(set (match_operand:DI 0 "register_operand") (minus:DI (match_operand:DI 2 "const_int_operand") - (sign_extend:DI + (any_extend:DI (xor:SI (minus:SI (const_int 31) (clz:SI (match_operand:SI 1 "nonimmediate_operand"))) @@ -22992,7 +23212,8 @@ (match_operand 3))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] - UNSPEC_TLS_GD)] + UNSPEC_TLS_GD) + (clobber (match_operand:P 4 "register_operand" "=D"))] "TARGET_64BIT" { if (!TARGET_X32) @@ -23009,7 +23230,7 @@ Use data16 prefix instead, which doesn't have this problem. */ fputs ("\tdata16", asm_out_file); output_asm_insn - ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands); if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) fputs (ASM_SHORT "0x6666\n", asm_out_file); else @@ -23033,14 +23254,15 @@ (match_operand 4))) (unspec:DI [(match_operand 1 "tls_symbolic_operand") (reg:DI SP_REG)] - UNSPEC_TLS_GD)] + UNSPEC_TLS_GD) + (clobber (match_operand:DI 5 "register_operand" "=D"))] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[3]) == CONST && GET_CODE (XEXP (operands[3], 0)) == UNSPEC && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF" { output_asm_insn - ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + ("lea{q}\t{%E1@tlsgd(%%rip), %5|%5, %E1@tlsgd[rip]}", operands); output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands); output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands); return "call\t{*%%rax|rax}"; @@ -23056,7 +23278,8 @@ (const_int 0))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] - UNSPEC_TLS_GD)])] + UNSPEC_TLS_GD) + (clobber (match_operand:P 3 "register_operand"))])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") @@ -23107,11 +23330,12 @@ (call:P (mem:QI (match_operand 1 "constant_call_address_operand" "Bz")) (match_operand 2))) - (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)] + (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:P 3 "register_operand" "=D"))] "TARGET_64BIT" { output_asm_insn - ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + ("lea{q}\t{%&@tlsld(%%rip), %q3|%q3, %&@tlsld[rip]}", operands); if (TARGET_SUN_TLS) return "call\t%p1@plt"; if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) @@ -23127,14 +23351,15 @@ (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") (match_operand:DI 2 "immediate_operand" "i"))) (match_operand 3))) - (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)] + (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:DI 4 "register_operand" "=D"))] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[2]) == CONST && GET_CODE (XEXP (operands[2], 0)) == UNSPEC && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF" { output_asm_insn - ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + ("lea{q}\t{%&@tlsld(%%rip), %4|%4, %&@tlsld[rip]}", operands); output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands); output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands); return "call\t{*%%rax|rax}"; @@ -23148,7 +23373,8 @@ (call:P (mem:QI (match_operand 1)) (const_int 0))) - (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])] + (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) + (clobber (match_operand:P 2 "register_operand"))])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") @@ -25587,10 +25813,6 @@ (clobber (reg:CC FLAGS_REG))])] "" { - /* Can't use this for non-default address spaces. */ - if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))) - FAIL; - int piece_size = GET_MODE_SIZE (GET_MODE (operands[1])); /* If .md ever supports :P for Pmode, these can be directly @@ -25598,9 +25820,14 @@ operands[5] = plus_constant (Pmode, operands[0], piece_size); operands[6] = plus_constant (Pmode, operands[2], piece_size); - /* Can't use this if the user has appropriated esi or edi. */ + /* Can't use this if the user has appropriated esi or edi, + * or if we have the destination in the non-default address space, + * since string insns cannot override the destination segment. */ if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) - && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) + && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1])) + && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])) + || Pmode == word_mode)) { emit_insn (gen_strmov_singleop (operands[0], operands[1], operands[2], operands[3], @@ -25635,8 +25862,15 @@ (const_int 8)))] "TARGET_64BIT && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsq" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsq"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "DI")]) @@ -25651,8 +25885,15 @@ (plus:P (match_dup 3) (const_int 4)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movs{l|d}" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movs{l|d}"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "SI")]) @@ -25667,8 +25908,15 @@ (plus:P (match_dup 3) (const_int 2)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsw" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsw"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "HI")]) @@ -25683,8 +25931,15 @@ (plus:P (match_dup 3) (const_int 1)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsb" + && ix86_check_movs (insn, 0)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1movsb"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set (attr "prefix_rex") @@ -25723,8 +25978,15 @@ (use (match_dup 5))] "TARGET_64BIT && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movsq" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movsq"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25743,8 +26005,15 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movs{l|d}" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movs{l|d}"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25761,8 +26030,15 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movsb" + && ix86_check_movs (insn, 3)" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^%v1rep{%;} movsb"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25844,7 +26120,8 @@ (unspec [(const_int 0)] UNSPEC_STOS)] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosq" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25858,7 +26135,8 @@ (const_int 4))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25872,7 +26150,8 @@ (const_int 2))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosw" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25886,7 +26165,8 @@ (const_int 1))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosb" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25922,7 +26202,8 @@ (use (match_dup 4))] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -25940,7 +26221,8 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -25957,7 +26239,8 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -26224,8 +26507,8 @@ (define_expand "mov<mode>cc" [(set (match_operand:SWIM 0 "register_operand") (if_then_else:SWIM (match_operand 1 "comparison_operator") - (match_operand:SWIM 2 "<general_operand>") - (match_operand:SWIM 3 "<general_operand>")))] + (match_operand:SWIM 2 "general_operand") + (match_operand:SWIM 3 "general_operand")))] "" "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") @@ -26592,8 +26875,8 @@ [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF (match_operand 1 "comparison_operator") - (match_operand:X87MODEF 2 "register_operand") - (match_operand:X87MODEF 3 "register_operand")))] + (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand") + (match_operand:X87MODEF 3 "nonimm_or_0_operand")))] "(TARGET_80387 && TARGET_CMOVE) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") @@ -27183,7 +27466,7 @@ (cond [(and (eq_attr "alternative" "0") (not (match_test "TARGET_OPT_AGU"))) (const_string "alu") - (match_operand:<MODE> 2 "const0_operand") + (match_operand 2 "const0_operand") (const_string "imov") ] (const_string "lea"))) @@ -27197,6 +27480,46 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) +(define_insn "@pro_epilogue_adjust_stack_add_nocc<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (plus:P (match_operand:P 1 "register_operand" "r") + (match_operand:P 2 "<nonmemory_operand>" "l<i>"))) + (clobber (mem:BLK (scratch)))] + "" +{ + if (get_attr_type (insn) == TYPE_IMOV) + return "mov{<imodesuffix>}\t{%1, %0|%0, %1}"; + else + { + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}"; + } +} + [(set (attr "type") + (cond [(match_operand 2 "const0_operand") + (const_string "imov") + ] + (const_string "lea"))) + (set (attr "length_immediate") + (cond [(eq_attr "type" "imov") + (const_string "0") + ] + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_peephole2 + [(parallel + [(set (match_operand:P 0 "register_operand") + (plus:P (match_dup 0) + (match_operand:P 1 "<nonmemory_operand>"))) + (clobber (mem:BLK (scratch)))])] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel + [(set (match_dup 0) + (plus:P (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])]) + (define_insn "@pro_epilogue_adjust_stack_sub_<mode>" [(set (match_operand:P 0 "register_operand" "=r") (minus:P (match_operand:P 1 "register_operand" "0") @@ -28144,6 +28467,41 @@ const0_rtx); }) +;; For APX NDD PLUS/MINUS/LOGIC +;; Like cmpelim optimized pattern. +;; Reduce an extra mov instruction like +;; decl (%rdi), %eax +;; mov %eax, (%rdi) +;; to +;; decl (%rdi) +(define_peephole2 + [(parallel [(set (reg FLAGS_REG) + (compare (match_operator:SWI 2 "plusminuslogic_operator" + [(match_operand:SWI 0 "memory_operand") + (match_operand:SWI 1 "<nonmemory_operand>")]) + (const_int 0))) + (set (match_operand:SWI 3 "register_operand") (match_dup 2))]) + (set (match_dup 0) (match_dup 3))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (2, operands[3]) + && !reg_overlap_mentioned_p (operands[3], operands[0]) + && ix86_match_ccmode (peep2_next_insn (0), + (GET_CODE (operands[2]) == PLUS + || GET_CODE (operands[2]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 6)) + (set (match_dup 0) (match_dup 5))])] +{ + operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0)); + operands[5] + = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + copy_rtx (operands[0]), operands[1]); + operands[6] + = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]), + const0_rtx); +}) + ;; Likewise for instances where we have a lea pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") @@ -28237,6 +28595,54 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movq (%rdi), %rax +;; xorq %rsi, %rax, %rdx +;; movb %rdx, (%rdi) +;; cmpb %rsi, %rax +;; jne +;; to +;; xorb %rsi, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_operand:SWI 4 "register_operand") + (xor:SWI (match_operand:SWI 3 "register_operand") + (match_operand:SWI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 4)) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI 5 "register_operand") + (match_operand:SWI 6 "<nonmemory_operand>")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && (rtx_equal_p (operands[0], operands[5]) + ? rtx_equal_p (operands[2], operands[6]) + : rtx_equal_p (operands[2], operands[5]) + && rtx_equal_p (operands[0], operands[6])) + && peep2_reg_dead_p (3, operands[4]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], QImode) + || any_QIreg_operand (operands[2], QImode))" + [(parallel [(set (match_dup 7) (match_dup 9)) + (set (match_dup 1) (match_dup 8))])] +{ + operands[7] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]), + operands[2]); + operands[9] + = gen_rtx_COMPARE (GET_MODE (operands[7]), + copy_rtx (operands[8]), + const0_rtx); +}) + (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) @@ -28480,6 +28886,58 @@ const0_rtx); }) +;; For APX NDD XOR +;; Reduce 2 mov and 1 cmp instruction. +;; from +;; movb (%rdi), %al +;; xorl %esi, %eax, %edx +;; movb %dl, (%rdi) +;; cmpb %sil, %al +;; jne +;; to +;; xorl %sil, (%rdi) +;; jne +(define_peephole2 + [(set (match_operand:SWI12 0 "register_operand") + (match_operand:SWI12 1 "memory_operand")) + (parallel [(set (match_operand:SI 4 "register_operand") + (xor:SI (match_operand:SI 3 "register_operand") + (match_operand:SI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_operand:SWI12 5 "register_operand")) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI12 6 "register_operand") + (match_operand:SWI12 7 "<nonmemory_operand>")))] + "TARGET_APX_NDD + && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REGNO (operands[3]) == REGNO (operands[0]) + && REGNO (operands[5]) == REGNO (operands[4]) + && (rtx_equal_p (operands[0], operands[6]) + ? (REG_P (operands[2]) + ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7]) + : rtx_equal_p (operands[2], operands[7])) + : (rtx_equal_p (operands[0], operands[7]) + && REG_P (operands[2]) + && REGNO (operands[2]) == REGNO (operands[6]))) + && peep2_reg_dead_p (3, operands[5]) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], SImode) + || any_QIreg_operand (operands[2], SImode))" + [(parallel [(set (match_dup 8) (match_dup 10)) + (set (match_dup 1) (match_dup 9))])] +{ + operands[8] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]), + gen_lowpart (<MODE>mode, operands[2])); + operands[10] + = gen_rtx_COMPARE (GET_MODE (operands[8]), + copy_rtx (operands[9]), + const0_rtx); +}) + ;; Attempt to optimize away memory stores of values the memory already ;; has. See PR79593. (define_peephole2 @@ -29082,6 +29540,23 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) +(define_expand "crc_rev<SWI124:mode>si4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SWI124 2 "nonimmediate_operand") + (match_operand:SI 3)] + "TARGET_CRC32" +{ + /* crc32 uses iSCSI polynomial */ + if (INTVAL (operands[3]) == 0x1EDC6F41) + emit_insn (gen_sse4_2_crc32<mode> (operands[0], operands[1], operands[2])); + else + expand_reversed_crc_table_based (operands[0], operands[1], operands[2], + operands[3], <SWI124:MODE>mode, + generate_reflecting_code_standard); + DONE; +}) + (define_insn "rdpmc" [(set (match_operand:DI 0 "register_operand" "=A") (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")] diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 27d34bd..c93c0b1 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -36,13 +36,6 @@ HOST_WIDE_INT ix86_isa_flags_explicit Variable HOST_WIDE_INT ix86_isa_flags2_explicit -; Indicate if AVX512 and AVX10.1 are explicitly set no. -Variable -int ix86_no_avx512_explicit = 0 - -Variable -int ix86_no_avx10_1_explicit = 0 - ; Additional target flags Variable int ix86_target_flags @@ -103,14 +96,6 @@ HOST_WIDE_INT x_ix86_isa_flags2_explicit TargetSave HOST_WIDE_INT x_ix86_isa_flags_explicit -;; which flags were passed by the user -TargetSave -HOST_WIDE_INT x_ix86_no_avx512_explicit - -;; which flags were passed by the user -TargetSave -HOST_WIDE_INT x_ix86_no_avx10_1_explicit - ;; whether -mtune was not specified TargetSave unsigned char tune_defaulted @@ -721,13 +706,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation. msse4 -Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save +Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation. -mno-sse4 -Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save -Do not support SSE4.1 and SSE4.2 built-in functions and code generation. - msse5 Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed) ;; Deprecated @@ -1355,38 +1336,24 @@ mapx-inline-asm-use-gpr32 Target Var(ix86_apx_inline_asm_use_gpr32) Init(0) Enable GPR32 in inline asm when APX_F enabled. -mevex512 -Target Mask(ISA2_EVEX512) Var(ix86_isa_flags2) Save Warn(%<-mevex512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported) -Support 512 bit vector built-in functions and code generation. - musermsr Target Mask(ISA2_USER_MSR) Var(ix86_isa_flags2) Save Support USER_MSR built-in functions and code generation. -mavx10.1-256 -Target Mask(ISA2_AVX10_1_256) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported) -Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -and AVX10.1-256 built-in functions and code generation. - mavx10.1 -Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported) -Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -and AVX10.1-512 built-in functions and code generation. - -mavx10.1-512 -Target Alias(mavx10.1) +Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -and AVX10.1-512 built-in functions and code generation. +and AVX10.1 built-in functions and code generation. mavx10.2 Target Mask(ISA2_AVX10_2) Var(ix86_isa_flags2) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -AVX10.1-512 and AVX10.2 built-in functions and code generation. +AVX10.1 and AVX10.2 built-in functions and code generation. mamx-avx512 Target Mask(ISA2_AMX_AVX512) Var(ix86_isa_flags2) Save -Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX10.1-512, -AVX10.2 and AMX-AVX512 built-in functions and code generation. +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, +AVX10.1, AVX10.2 and AMX-AVX512 built-in functions and code generation. mamx-tf32 Target Mask(ISA2_AMX_TF32) Var(ix86_isa_flags2) Save diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls index 0d5a5a1..cce524c 100644 --- a/gcc/config/i386/i386.opt.urls +++ b/gcc/config/i386/i386.opt.urls @@ -590,21 +590,12 @@ UrlSuffix(gcc/x86-Options.html#index-mapxf) mapx-inline-asm-use-gpr32 UrlSuffix(gcc/x86-Options.html#index-mapx-inline-asm-use-gpr32) -mevex512 -UrlSuffix(gcc/x86-Options.html#index-mevex512) - musermsr UrlSuffix(gcc/x86-Options.html#index-musermsr) -mavx10.1-256 -UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256) - mavx10.1 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1) -mavx10.1-512 -UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512) - mavx10.2 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2) diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index c30a4e0..b195fe5 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -148,24 +148,14 @@ #include <avx10_2mediaintrin.h> -#include <avx10_2-512mediaintrin.h> - #include <avx10_2convertintrin.h> -#include <avx10_2-512convertintrin.h> - #include <avx10_2bf16intrin.h> -#include <avx10_2-512bf16intrin.h> - #include <avx10_2satcvtintrin.h> -#include <avx10_2-512satcvtintrin.h> - #include <avx10_2minmaxintrin.h> -#include <avx10_2-512minmaxintrin.h> - #include <avx10_2copyintrin.h> #include <movrsintrin.h> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3d3848c..3afaf83 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -218,6 +218,7 @@ case UNSPEC_DTPOFF: case UNSPEC_GOTNTPOFF: case UNSPEC_NTPOFF: + case UNSPEC_SECREL32: return true; default: break; @@ -392,6 +393,23 @@ return false; }) +;; Return true if VALUE is a constant integer whose negation satisfies +;; x86_64_immediate_operand. +(define_predicate "x86_64_neg_const_int_operand" + (match_code "const_int") +{ + HOST_WIDE_INT val = -UINTVAL (op); + if (mode == DImode && trunc_int_for_mode (val, SImode) != val) + return false; + if (flag_cf_protection & CF_BRANCH) + { + unsigned HOST_WIDE_INT endbr = TARGET_64BIT ? 0xfa1e0ff3 : 0xfb1e0ff3; + if ((val & HOST_WIDE_INT_C (0xffffffff)) == endbr) + return false; + } + return true; +}) + ;; Return true if VALUE is a constant integer whose low and high words satisfy ;; x86_64_immediate_operand. (define_predicate "x86_64_hilo_int_operand" @@ -646,8 +664,9 @@ ;; same segment as the GOT. Unfortunately, the flexibility of linker ;; scripts means that we can't be sure of that in general, so assume ;; @GOTOFF is not valid on VxWorks, except with the large code model. +;; The comments above seem to apply only to VxWorks releases before 7. (define_predicate "gotoff_operand" - (and (ior (not (match_test "TARGET_VXWORKS_RTP")) + (and (ior (not (match_test "TARGET_VXWORKS_VAROFF")) (match_test "ix86_cmodel == CM_LARGE") (match_test "ix86_cmodel == CM_LARGE_PIC")) (match_operand 0 "local_symbolic_operand"))) @@ -1267,12 +1286,19 @@ (match_operand 0 "vector_memory_operand") (match_code "const_vector"))) +; Return true when OP is register_operand, vector_memory_operand, +; const_vector zero or const_vector all ones. +(define_predicate "vector_or_0_or_1s_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "vector_memory_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + (define_predicate "bcst_mem_operand" (and (match_code "vec_duplicate") (and (match_test "TARGET_AVX512F") (ior (match_test "TARGET_AVX512VL") - (and (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64") - (match_test "TARGET_EVEX512")))) + (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64"))) (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))") (match_test "GET_MODE (XEXP (op, 0)) == GET_MODE_INNER (GET_MODE (op))") @@ -1333,6 +1359,12 @@ (ior (match_operand 0 "nonimmediate_operand") (match_operand 0 "const0_operand"))) +; Return true when OP is a nonimmediate or zero or all ones. +(define_predicate "nonimm_or_0_or_1s_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + ;; Return true for RTX codes that force SImode address. (define_predicate "SImode_address_operand" (match_code "subreg,zero_extend,and")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b280676..252ba07 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -279,63 +279,63 @@ ;; All vector modes including V?TImode, used in move patterns. (define_mode_iterator VMOVE - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI - (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX") V1TI - (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI + (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) ;; All AVX-512{F,VL} vector modes without HF. Supposed TARGET_AVX512F baseline. (define_mode_iterator V48_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator V48_256_512_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") + V16SF (V8SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL")]) ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline. (define_mode_iterator V48H_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline. (define_mode_iterator VI12_AVX512VL - [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") - (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) + [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) (define_mode_iterator VI12HFBF_AVX512VL - [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") - (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") - (V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") - (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) + [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") + V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") + V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) (define_mode_iterator VI1_AVX512VL - [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) + [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) ;; All vector modes (define_mode_iterator V - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) ;; All 128bit vector modes (define_mode_iterator V_128 @@ -352,54 +352,44 @@ ;; All 512bit vector modes (define_mode_iterator V_512 - [(V64QI "TARGET_EVEX512") (V32HI "TARGET_EVEX512") - (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") - (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512") - (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")]) + [V64QI V32HI V16SI V8DI + V16SF V8DF V32HF V32BF]) ;; All 256bit and 512bit vector modes (define_mode_iterator V_256_512 [V32QI V16HI V16HF V16BF V8SI V4DI V8SF V4DF - (V64QI "TARGET_AVX512F && TARGET_EVEX512") - (V32HI "TARGET_AVX512F && TARGET_EVEX512") - (V32HF "TARGET_AVX512F && TARGET_EVEX512") - (V32BF "TARGET_AVX512F && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") + (V32HF "TARGET_AVX512F") (V32BF "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) ;; All vector float modes (define_mode_iterator VF - [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) (define_mode_iterator VF1_VF2_AVX512DQ - [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512DQ && TARGET_EVEX512") + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ && TARGET_AVX512VL") (V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")]) -(define_mode_iterator VF1_VF2_AVX10_2 - [(V16SF "TARGET_AVX10_2") V8SF V4SF - (V8DF "TARGET_AVX10_2") V4DF V2DF]) - (define_mode_iterator VFH - [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) (define_mode_iterator VF_BHSD - [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2") (V32BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") @@ -408,12 +398,12 @@ ;; 128-, 256- and 512-bit float vector modes for bitwise operations (define_mode_iterator VFB - [(V32BF "TARGET_AVX512F && TARGET_EVEX512") + [(V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") (V8BF "TARGET_SSE2") - (V32HF "TARGET_AVX512F && TARGET_EVEX512") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) ;; 128- and 256-bit float vector modes @@ -430,44 +420,39 @@ ;; All SFmode vector float modes (define_mode_iterator VF1 - [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF]) + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF]) (define_mode_iterator VF1_AVX2 - [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF]) + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF]) ;; 128- and 256-bit SF vector modes (define_mode_iterator VF1_128_256 [(V8SF "TARGET_AVX") V4SF]) (define_mode_iterator VF1_128_256VL - [(V8SF "TARGET_EVEX512") (V4SF "TARGET_AVX512VL")]) + [V8SF (V4SF "TARGET_AVX512VL")]) ;; All DFmode vector float modes (define_mode_iterator VF2 - [(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF]) - -(define_mode_iterator VF2_AVX10_2 - [(V8DF "TARGET_AVX10_2") V4DF V2DF]) + [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) ;; All DFmode & HFmode & BFmode vector float modes (define_mode_iterator VF2HB - [(V32BF "TARGET_AVX10_2") - (V16BF "TARGET_AVX10_2") - (V8BF "TARGET_AVX10_2") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") + (V8BF "TARGET_AVX10_2") (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF]) + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) ;; 128- and 256-bit DF vector modes (define_mode_iterator VF2_128_256 [(V4DF "TARGET_AVX") V2DF]) (define_mode_iterator VF2_512_256 - [(V8DF "TARGET_AVX512F && TARGET_EVEX512") V4DF]) + [(V8DF "TARGET_AVX512F") V4DF]) (define_mode_iterator VF2_512_256VL - [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")]) + [V8DF (V4DF "TARGET_AVX512VL")]) ;; All 128bit vector SF/DF modes (define_mode_iterator VF_128 @@ -484,116 +469,102 @@ ;; All 512bit vector float modes (define_mode_iterator VF_512 - [(V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")]) + [V16SF V8DF]) ;; All 512bit vector float modes for bitwise operations (define_mode_iterator VFB_512 - [(V32BF "TARGET_EVEX512") - (V32HF "TARGET_EVEX512") - (V16SF "TARGET_EVEX512") - (V8DF "TARGET_EVEX512")]) + [V32BF V32HF V16SF V8DF]) (define_mode_iterator V24F_128 [V4SF V8HF V8BF]) (define_mode_iterator VI48_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI1248_AVX512VLBW - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V16QI "TARGET_AVX512VL && TARGET_AVX512BW") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") - (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) - -(define_mode_iterator VI1248_AVX10_2 - [(V64QI "TARGET_AVX10_2") V32QI V16QI - (V32HI "TARGET_AVX10_2") V16HI V8HI - (V16SI "TARGET_AVX10_2") V8SI V4SI - (V8DI "TARGET_AVX10_2") V4DI V2DI]) + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VF_AVX512VL - [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator VFH_AVX512VL - [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + +(define_mode_iterator V48_AVX512VL_4 + [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") + (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")]) + +(define_mode_iterator VI48_AVX512VL_4 + [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")]) -(define_mode_iterator VFH_AVX10_2 - [(V32HF "TARGET_AVX10_2") V16HF V8HF - (V16SF "TARGET_AVX10_2") V8SF V4SF - (V8DF "TARGET_AVX10_2") V4DF V2DF]) +(define_mode_iterator V8_AVX512VL_2 + [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VF2_AVX512VL - [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator VF1_AVX512VL - [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")]) + [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")]) (define_mode_iterator VF1_AVX512BW - [(V16SF "TARGET_AVX512BW && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF]) - -(define_mode_iterator VF1_AVX10_2 - [(V16SF "TARGET_AVX10_2") V8SF V4SF]) + [(V16SF "TARGET_AVX512BW") (V8SF "TARGET_AVX2") V4SF]) (define_mode_iterator VHFBF - [(V32HF "TARGET_EVEX512") V16HF V8HF - (V32BF "TARGET_EVEX512") V16BF V8BF]) + [V32HF V16HF V8HF V32BF V16BF V8BF]) (define_mode_iterator VHFBF_256 [V16HF V16BF]) (define_mode_iterator VHFBF_128 [V8HF V8BF]) (define_mode_iterator VHF_AVX512VL - [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) + [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) (define_mode_iterator VHFBF_AVX512VL - [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") - (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) - -(define_mode_iterator VHF_AVX10_2 - [(V32HF "TARGET_AVX10_2") V16HF V8HF]) + [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") + V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) -(define_mode_iterator VBF_AVX10_2 - [(V32BF "TARGET_AVX10_2") V16BF V8BF]) +(define_mode_iterator VBF + [V32BF V16BF V8BF]) ;; All vector integer modes (define_mode_iterator VI - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI + [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") + (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI]) ;; All vector integer and HF modes (define_mode_iterator VIHFBF - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V8SI "TARGET_AVX") V4SI - (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF]) + [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") + (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI + (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI + (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF]) (define_mode_iterator VI_AVX2 - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI_AVX_AVX512F - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) ;; All QImode vector integer modes (define_mode_iterator VI1 @@ -611,56 +582,50 @@ (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")]) (define_mode_iterator VI8 - [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI]) - -(define_mode_iterator VI8_AVX10_2 - [(V8DI "TARGET_AVX10_2") V4DI V2DI]) + [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) (define_mode_iterator VI8_FVL - [(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")]) + [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI8_AVX512VL - [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI8_256_512 - [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")]) + [V8DI (V4DI "TARGET_AVX512VL")]) (define_mode_iterator VI1_AVX2 [(V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI1_AVX512 - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI1_AVX512F - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI]) (define_mode_iterator VI1_AVX512VNNI - [(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI]) + [(V64QI "TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI1_AVX512VNNIBW - [(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512") + [(V64QI "TARGET_AVX512BW || TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI]) (define_mode_iterator VI12_256_512_AVX512VL - [(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL") - (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")]) + [V64QI (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL")]) (define_mode_iterator VI2_AVX2 [(V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI2_AVX2_AVX512BW - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI]) + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI2_AVX512F - [(V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI]) + [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI2_AVX512VNNIBW - [(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512") + [(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI") (V16HI "TARGET_AVX2") V8HI]) -(define_mode_iterator VI2_AVX10_2 - [(V32HI "TARGET_AVX10_2") V16HI V8HI]) - (define_mode_iterator VI4_AVX [(V8SI "TARGET_AVX") V4SI]) @@ -668,65 +633,64 @@ [(V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI4_AVX512F - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI]) + [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI4_AVX512VL - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) (define_mode_iterator VI4_AVX10_2 [(V16SI "TARGET_AVX10_2") V8SI V4SI]) (define_mode_iterator VI48_AVX512F_AVX512VL - [V4SI V8SI (V16SI "TARGET_AVX512F && TARGET_EVEX512") + [V4SI V8SI (V16SI "TARGET_AVX512F") (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") - (V8DI "TARGET_AVX512F && TARGET_EVEX512")]) + (V8DI "TARGET_AVX512F")]) (define_mode_iterator VI2_AVX512VL - [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")]) + [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI]) (define_mode_iterator VI2HFBF_AVX512VL - [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512") - (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") (V32HF "TARGET_EVEX512") - (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") (V32BF "TARGET_EVEX512")]) + [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI + (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") V32HF + (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") V32BF]) (define_mode_iterator VI2H_AVX512VL - [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512") - (V8SI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512") - (V8DI "TARGET_EVEX512")]) + [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI + (V8SI "TARGET_AVX512VL") V16SI V8DI]) (define_mode_iterator VI1_AVX512VL_F - [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F && TARGET_EVEX512")]) + [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")]) (define_mode_iterator VI8_AVX2_AVX512BW - [(V8DI "TARGET_AVX512BW && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI]) + [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI8_AVX2 [(V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI8_AVX2_AVX512F - [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI]) + [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI8_AVX_AVX512F - [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")]) + [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")]) (define_mode_iterator VI4_128_8_256 [V4SI V4DI]) ;; All V8D* modes (define_mode_iterator V8FI - [(V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [V8DF V8DI]) ;; All V16S* modes (define_mode_iterator V16FI - [(V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")]) + [V16SF V16SI]) ;; ??? We should probably use TImode instead. (define_mode_iterator VIMAX_AVX2_AVX512BW - [(V4TI "TARGET_AVX512BW && TARGET_EVEX512") (V2TI "TARGET_AVX2") V1TI]) + [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI]) ;; Suppose TARGET_AVX512BW as baseline (define_mode_iterator VIMAX_AVX512VL - [(V4TI "TARGET_EVEX512") (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")]) + [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")]) (define_mode_iterator VIMAX_AVX2 [(V2TI "TARGET_AVX2") V1TI]) @@ -736,17 +700,17 @@ (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI12_AVX2_AVX512BW - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) (define_mode_iterator VI24_AVX2 [(V16HI "TARGET_AVX2") V8HI (V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI124_AVX2 [(V32QI "TARGET_AVX2") V16QI @@ -754,17 +718,17 @@ (V8SI "TARGET_AVX2") V4SI]) (define_mode_iterator VI248_AVX512VL - [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") + [V32HI V16SI V8DI (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI248_AVX512VLBW - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") - (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_mode_iterator VI48_AVX2 [(V8SI "TARGET_AVX2") V4SI @@ -776,17 +740,16 @@ (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI - (V16SI "TARGET_AVX512BW && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI]) + [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) (define_mode_iterator VI248_AVX512BW - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16SI "TARGET_EVEX512") - (V8DI "TARGET_EVEX512")]) + [(V32HI "TARGET_AVX512BW") V16SI V8DI]) (define_mode_iterator VI248_AVX512BW_AVX512VL - [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V4DI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [(V32HI "TARGET_AVX512BW") + (V4DI "TARGET_AVX512VL") V16SI V8DI]) ;; Suppose TARGET_AVX512VL as baseline (define_mode_iterator VI248_AVX512BW_1 @@ -800,16 +763,16 @@ V4DI V2DI]) (define_mode_iterator VI48_AVX512F - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") V8SI V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI V2DI]) + [(V16SI "TARGET_AVX512F") V8SI V4SI + (V8DI "TARGET_AVX512F") V4DI V2DI]) (define_mode_iterator VI48_AVX_AVX512F - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI]) + [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) (define_mode_iterator VI12_AVX_AVX512F - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI]) (define_mode_iterator V48_128_256 [V4SF V2DF @@ -950,10 +913,10 @@ (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) (define_mode_iterator VI248_256 [V16HI V8SI V4DI]) (define_mode_iterator VI248_512 - [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [V32HI V16SI V8DI]) (define_mode_iterator VI48_128 [V4SI V2DI]) (define_mode_iterator VI148_512 - [(V64QI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [V64QI V16SI V8DI]) (define_mode_iterator VI148_256 [V32QI V8SI V4DI]) (define_mode_iterator VI148_128 [V16QI V4SI V2DI]) @@ -961,75 +924,62 @@ (define_mode_iterator VI124_256 [V32QI V16HI V8SI]) (define_mode_iterator VI124_256_AVX512F_AVX512BW [V32QI V16HI V8SI - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW") + (V16SI "TARGET_AVX512F")]) (define_mode_iterator VI48_256 [V8SI V4DI]) (define_mode_iterator VI48_512 - [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [V16SI V8DI]) (define_mode_iterator VI4_256_8_512 [V8SI V8DI]) (define_mode_iterator VI_AVX512BW - [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512")]) + [V16SI V8DI + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) (define_mode_iterator VIHFBF_AVX512BW - [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") - (V32HF "TARGET_AVX512BW && TARGET_EVEX512") - (V32BF "TARGET_AVX512BW && TARGET_EVEX512")]) + [V16SI V8DI + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW") + (V32HF "TARGET_AVX512BW") (V32BF "TARGET_AVX512BW")]) ;; Int-float size matches (define_mode_iterator VI2F_256_512 - [V16HI (V32HI "TARGET_EVEX512") - V16HF (V32HF "TARGET_EVEX512") - V16BF (V32BF "TARGET_EVEX512")]) + [V16HI V32HI V16HF V32HF V16BF V32BF]) (define_mode_iterator VI4F_128 [V4SI V4SF]) (define_mode_iterator VI8F_128 [V2DI V2DF]) (define_mode_iterator VI4F_256 [V8SI V8SF]) (define_mode_iterator VI8F_256 [V4DI V4DF]) (define_mode_iterator VI4F_256_512 - [V8SI V8SF - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512")]) + [V8SI V8SF (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")]) (define_mode_iterator VI48F_256_512 [V8SI V8SF - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) + (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F") + (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) (define_mode_iterator VF48H_AVX512VL - [(V8DF "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")]) + [V8DF V16SF (V8SF "TARGET_AVX512VL")]) (define_mode_iterator VF48_128 [V2DF V4SF]) (define_mode_iterator VI48F - [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") - (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512") + [V16SI V16SF V8DI V8DF (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_mode_iterator VI12_VI48F_AVX512VL - [(V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") + [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") - (V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") - (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) + V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF]) (define_mode_iterator V8_128 [V8HI V8HF V8BF]) (define_mode_iterator V16_256 [V16HI V16HF V16BF]) (define_mode_iterator V32_512 - [(V32HI "TARGET_EVEX512") (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")]) + [V32HI V32HF V32BF]) ;; Mapping from float mode to required SSE level (define_mode_attr sse @@ -1441,7 +1391,7 @@ ;; Mix-n-match (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) (define_mode_iterator AVX512MODE2P - [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")]) + [V16SI V16SF V8DF]) ;; Mapping for dbpsabbw modes (define_mode_attr dbpsadbwmode @@ -1639,6 +1589,44 @@ "&& 1" [(set (match_dup 0) (match_dup 1))]) +(define_insn_and_split "*<avx512>_load<mode>mask_and15" + [(set (match_operand:V48_AVX512VL_4 0 "register_operand" "=v") + (vec_merge:V48_AVX512VL_4 + (unspec:V48_AVX512VL_4 + [(match_operand:V48_AVX512VL_4 1 "memory_operand" "m")] + UNSPEC_MASKLOAD) + (match_operand:V48_AVX512VL_4 2 "nonimm_or_0_operand" "0C") + (and:QI + (match_operand:QI 3 "register_operand" "Yk") + (const_int 15))))] + "TARGET_AVX512F" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V48_AVX512VL_4 + (unspec:V48_AVX512VL_4 [(match_dup 1)] UNSPEC_MASKLOAD) + (match_dup 2) + (match_dup 3)))]) + +(define_insn_and_split "*<avx512>_load<mode>mask_and3" + [(set (match_operand:V8_AVX512VL_2 0 "register_operand" "=v") + (vec_merge:V8_AVX512VL_2 + (unspec:V8_AVX512VL_2 + [(match_operand:V8_AVX512VL_2 1 "memory_operand" "m")] + UNSPEC_MASKLOAD) + (match_operand:V8_AVX512VL_2 2 "nonimm_or_0_operand" "0C") + (and:QI + (match_operand:QI 3 "register_operand" "Yk") + (const_int 3))))] + "TARGET_AVX512F" + "#" + "&& 1" + [(set (match_dup 0) + (vec_merge:V8_AVX512VL_2 + (unspec:V8_AVX512VL_2 [(match_dup 1)] UNSPEC_MASKLOAD) + (match_dup 2) + (match_dup 3)))]) + (define_expand "<avx512>_load<mode>_mask" [(set (match_operand:VI12_AVX512VL 0 "register_operand") (vec_merge:VI12_AVX512VL @@ -2049,11 +2037,9 @@ (define_mode_iterator STORENT_MODE [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) (define_expand "storent<mode>" [(set (match_operand:STORENT_MODE 0 "memory_operand") @@ -2857,10 +2843,10 @@ }) (define_expand "div<mode>3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand") - (div:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "register_operand") - (match_operand:VBF_AVX10_2 2 "vector_operand")))] + [(set (match_operand:VBF 0 "register_operand") + (div:VBF + (match_operand:VBF 1 "register_operand") + (match_operand:VBF 2 "vector_operand")))] "TARGET_AVX10_2" { if (TARGET_RECIP_VEC_DIV @@ -3897,15 +3883,12 @@ (define_mode_iterator REDUC_PLUS_MODE [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512") + (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V32QI "TARGET_AVX") (V16HI "TARGET_AVX") (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") - (V64QI "TARGET_AVX512F && TARGET_EVEX512") - (V32HI "TARGET_AVX512F && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")]) (define_expand "reduc_plus_scal_<mode>" [(plus:REDUC_PLUS_MODE @@ -3948,13 +3931,11 @@ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") - (V64QI "TARGET_AVX512BW && TARGET_EVEX512") - (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512BW") + (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F") + (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) (define_expand "reduc_<code>_scal_<mode>" [(smaxmin:REDUC_SMINMAX_MODE @@ -4063,10 +4044,8 @@ (define_mode_iterator REDUC_ANY_LOGIC_MODE [(V32QI "TARGET_AVX") (V16HI "TARGET_AVX") (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") - (V64QI "TARGET_AVX512F && TARGET_EVEX512") - (V32HI "TARGET_AVX512F && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512")]) + (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")]) (define_expand "reduc_<code>_scal_<mode>" [(any_logic:REDUC_ANY_LOGIC_MODE @@ -4410,7 +4389,7 @@ (unspec:<V48H_AVX512VL:avx512fmaskmode> [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v") (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm") - (match_operand:SI 3 "const_0_to_7_operand" "n")] + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] UNSPEC_PCMP)))] "TARGET_AVX512F && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) @@ -4428,7 +4407,7 @@ (unspec:<V48H_AVX512VL:avx512fmaskmode> [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + (match_operand:SI 3 "<cmp_imm_predicate>")] UNSPEC_PCMP))) (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand") (unspec:<V48H_AVX512VL:avx512fmaskmode> @@ -4469,7 +4448,8 @@ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "<cmp_imm_predicate>" "n")] UNSPEC_PCMP)))] - "TARGET_AVX512F && ix86_pre_reload_split ()" + "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8 + && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -4480,6 +4460,70 @@ UNSPEC_PCMP))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_insn "*<avx512>_cmp<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v") + (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] + UNSPEC_PCMP) + (const_int 15)))] + "TARGET_AVX512F" + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*<avx512>_ucmp<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v") + (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP) + (const_int 15)))] + "TARGET_AVX512F" + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*<avx512>_cmp<mode>3_and3" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v") + (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] + UNSPEC_PCMP) + (const_int 3)))] + "TARGET_AVX512F" + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512vl_ucmpv2di3_and3" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V2DI 1 "nonimmediate_operand" "v") + (match_operand:V2DI 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP) + (const_int 3)))] + "TARGET_AVX512F" + "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> @@ -4762,7 +4806,8 @@ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "const_0_to_7_operand")] UNSPEC_UNSIGNED_PCMP)))] - "TARGET_AVX512F && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split () + && GET_MODE_NUNITS (<MODE>mode) >= 8" "#" "&& 1" [(set (match_dup 0) @@ -4923,8 +4968,8 @@ (define_expand "vec_cmp<mode><avx512fmaskmodelower>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (match_operator:<avx512fmaskmode> 1 "" - [(match_operand:VBF_AVX10_2 2 "register_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")]))] + [(match_operand:VBF 2 "register_operand") + (match_operand:VBF 3 "nonimmediate_operand")]))] "TARGET_AVX10_2" { bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]), @@ -5142,7 +5187,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_256_AVX2 0 "register_operand") (vec_merge:VI_256_AVX2 - (match_operand:VI_256_AVX2 1 "nonimmediate_operand") + (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand") (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5155,7 +5200,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_128 0 "register_operand") (vec_merge:VI_128 - (match_operand:VI_128 1 "vector_operand") + (match_operand:VI_128 1 "vector_or_0_or_1s_operand") (match_operand:VI_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE2" @@ -5168,7 +5213,7 @@ (define_expand "vcond_mask_v1tiv1ti" [(set (match_operand:V1TI 0 "register_operand") (vec_merge:V1TI - (match_operand:V1TI 1 "vector_operand") + (match_operand:V1TI 1 "vector_or_0_or_1s_operand") (match_operand:V1TI 2 "nonimm_or_0_operand") (match_operand:V1TI 3 "register_operand")))] "TARGET_SSE2" @@ -5181,7 +5226,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_256 0 "register_operand") (vec_merge:VF_256 - (match_operand:VF_256 1 "nonimmediate_operand") + (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand") (match_operand:VF_256 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5194,7 +5239,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_128 0 "register_operand") (vec_merge:VF_128 - (match_operand:VF_128 1 "vector_operand") + (match_operand:VF_128 1 "vector_or_0_or_1s_operand") (match_operand:VF_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE" @@ -5573,7 +5618,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") @@ -5630,7 +5675,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx_noavx512vl,avx512vl,avx512f_512") + [(set_attr "isa" "noavx,avx_noavx512f,avx512vl,avx512f") (set_attr "addr" "*,gpr16,*,*") (set_attr "type" "sselog") (set (attr "prefix_data16") @@ -5703,7 +5748,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") @@ -5765,7 +5810,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -5811,15 +5856,10 @@ (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") - (V8BF "TARGET_AVX10_2") - (V16BF "TARGET_AVX10_2") - (V32BF "TARGET_AVX10_2")]) + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") + (HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") + (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V32HF "TARGET_AVX512FP16") + (V8BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") (V32BF "TARGET_AVX10_2")]) (define_expand "fma<mode>4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -5857,8 +5897,7 @@ (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512")]) + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) @@ -5928,14 +5967,12 @@ ;; Suppose AVX-512F as baseline (define_mode_iterator VFH_SF_AVX512VL - [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512") + [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (HF "TARGET_AVX512FP16") - SF (V16SF "TARGET_EVEX512") - (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") - DF (V8DF "TARGET_EVEX512") - (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) + SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") + DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v") @@ -8683,7 +8720,7 @@ (unspec:V16SI [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_FIX_NOTRUNC))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -8751,7 +8788,7 @@ (unspec:V16SI [(match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_VCVTT_U))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvttps2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -8761,7 +8798,7 @@ [(set (match_operand:V16SI 0 "register_operand" "=v") (any_fix:V16SI (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -9349,7 +9386,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtdq2pd\t{%t1, %0|%0, %t1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -9385,7 +9422,7 @@ (unspec:V8SI [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_FIX_NOTRUNC))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -9544,7 +9581,7 @@ (unspec:V8SI [(match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_VCVTT_U))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvttpd2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -9554,7 +9591,7 @@ [(set (match_operand:V8SI 0 "register_operand" "=v") (any_fix:V8SI (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -10070,7 +10107,7 @@ [(set (match_operand:V8SF 0 "register_operand" "=v") (float_truncate:V8SF (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -10232,7 +10269,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtps2pd\t{%t1, %0|%0, %t1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -10438,7 +10475,7 @@ (set (match_operand:V8DF 0 "register_operand") (float_extend:V8DF (match_dup 2)))] -"TARGET_AVX512F && TARGET_EVEX512" +"TARGET_AVX512F" "operands[2] = gen_reg_rtx (V8SFmode);") (define_expand "vec_unpacks_lo_v4sf" @@ -10576,7 +10613,7 @@ (set (match_operand:V8DF 0 "register_operand") (float:V8DF (match_dup 2)))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "operands[2] = gen_reg_rtx (V8SImode);") (define_expand "vec_unpacks_float_lo_v16si" @@ -10588,7 +10625,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_expand "vec_unpacku_float_hi_v4si" [(set (match_dup 5) @@ -10684,7 +10721,7 @@ (define_expand "vec_unpacku_float_hi_v16si" [(match_operand:V8DF 0 "register_operand") (match_operand:V16SI 1 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { REAL_VALUE_TYPE TWO32r; rtx k, x, tmp[4]; @@ -10733,7 +10770,7 @@ (define_expand "vec_unpacku_float_lo_v16si" [(match_operand:V8DF 0 "register_operand") (match_operand:V16SI 1 "nonimmediate_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { REAL_VALUE_TYPE TWO32r; rtx k, x, tmp[3]; @@ -10827,7 +10864,7 @@ [(match_operand:V16SI 0 "register_operand") (match_operand:V8DF 1 "nonimmediate_operand") (match_operand:V8DF 2 "nonimmediate_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { rtx r1, r2; @@ -10942,7 +10979,7 @@ [(match_operand:V16SI 0 "register_operand") (match_operand:V8DF 1 "nonimmediate_operand") (match_operand:V8DF 2 "nonimmediate_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { rtx r1, r2; @@ -11135,7 +11172,7 @@ (const_int 11) (const_int 27) (const_int 14) (const_int 30) (const_int 15) (const_int 31)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -11223,7 +11260,7 @@ (const_int 9) (const_int 25) (const_int 12) (const_int 28) (const_int 13) (const_int 29)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -11363,7 +11400,7 @@ (const_int 11) (const_int 11) (const_int 13) (const_int 13) (const_int 15) (const_int 15)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") @@ -11416,7 +11453,7 @@ (const_int 10) (const_int 10) (const_int 12) (const_int 12) (const_int 14) (const_int 14)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") @@ -12376,9 +12413,7 @@ (V8SF "32x4") (V8SI "32x4") (V4DF "64x2") (V4DI "64x2")]) (define_mode_iterator AVX512_VEC - [(V8DF "TARGET_AVX512DQ && TARGET_EVEX512") - (V8DI "TARGET_AVX512DQ && TARGET_EVEX512") - (V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")]) + [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI]) (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask" [(match_operand:<ssequartermode> 0 "nonimmediate_operand") @@ -12547,9 +12582,7 @@ [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")]) (define_mode_iterator AVX512_VEC_2 - [(V16SF "TARGET_AVX512DQ && TARGET_EVEX512") - (V16SI "TARGET_AVX512DQ && TARGET_EVEX512") - (V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")]) + [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI]) (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask" [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") @@ -13110,7 +13143,7 @@ (const_int 26) (const_int 27) (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { if (TARGET_AVX512VL @@ -13159,7 +13192,7 @@ (const_int 58) (const_int 59) (const_int 60) (const_int 61) (const_int 62) (const_int 63)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}" [(set_attr "type" "sselog1") (set_attr "length_immediate" "1") @@ -13257,15 +13290,15 @@ ;; Modes handled by vec_extract patterns. (define_mode_iterator VEC_EXTRACT_MODE - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF - (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")]) + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI + (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF + (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")]) (define_expand "vec_extract<mode><ssescalarmodelower>" [(match_operand:<ssescalarmode> 0 "register_operand") @@ -13307,7 +13340,7 @@ (const_int 3) (const_int 11) (const_int 5) (const_int 13) (const_int 7) (const_int 15)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -13421,9 +13454,9 @@ (const_int 2) (const_int 10) (const_int 4) (const_int 12) (const_int 6) (const_int 14)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" - [(set_attr "type" "sselog1") + [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "V8DF")]) @@ -13437,7 +13470,7 @@ (const_int 2) (const_int 10) (const_int 4) (const_int 12) (const_int 6) (const_int 14)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -13454,7 +13487,7 @@ (const_int 2) (const_int 6)])))] "TARGET_AVX && <mask_avx512vl_condition>" "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" - [(set_attr "type" "sselog1") + [(set_attr "type" "ssemov") (set_attr "prefix" "<mask_prefix>") (set_attr "mode" "V4DF")]) @@ -13649,7 +13682,7 @@ (match_operand:SI 4 "const_0_to_255_operand")] UNSPEC_VTERNLOG))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) /* Disallow embeded broadcast for vector HFmode since it's not real AVX512FP16 instruction. */ && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4 @@ -13731,7 +13764,7 @@ [(set (match_operand:V 0 "register_operand") (match_operand:V 1 "ternlog_operand"))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split ()" "#" "&& 1" @@ -13761,7 +13794,7 @@ (match_operand:V 3 "regmem_or_bitnot_regmem_operand") (match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split () && (rtx_equal_p (STRIP_UNARY (operands[1]), STRIP_UNARY (operands[4])) @@ -13846,7 +13879,7 @@ (match_operand:V 3 "regmem_or_bitnot_regmem_operand")) (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split () && (rtx_equal_p (STRIP_UNARY (operands[1]), STRIP_UNARY (operands[4])) @@ -13930,7 +13963,7 @@ (match_operand:V 2 "regmem_or_bitnot_regmem_operand")) (match_operand:V 3 "regmem_or_bitnot_regmem_operand")))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split ()" "#" "&& 1" @@ -14080,7 +14113,7 @@ (match_operand:SI 3 "const_0_to_255_operand") (match_operand:V16SF 4 "register_operand") (match_operand:HI 5 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int mask = INTVAL (operands[3]); emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2], @@ -14267,7 +14300,7 @@ (match_operand 16 "const_12_to_15_operand") (match_operand 17 "const_28_to_31_operand") (match_operand 18 "const_28_to_31_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) @@ -14302,7 +14335,7 @@ (match_operand:SI 3 "const_0_to_255_operand") (match_operand:V8DF 4 "register_operand") (match_operand:QI 5 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int mask = INTVAL (operands[3]); emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2], @@ -14332,7 +14365,7 @@ (match_operand 8 "const_12_to_13_operand") (match_operand 9 "const_6_to_7_operand") (match_operand 10 "const_14_to_15_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int mask; mask = INTVAL (operands[3]); @@ -14464,7 +14497,7 @@ (const_int 3) (const_int 11) (const_int 5) (const_int 13) (const_int 7) (const_int 15)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -14514,7 +14547,7 @@ (const_int 2) (const_int 10) (const_int 4) (const_int 12) (const_int 6) (const_int 14)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -14880,7 +14913,7 @@ (set_attr "mode" "V2DF,DF,V8DF") (set (attr "enabled") (cond [(eq_attr "alternative" "2") - (symbol_ref "TARGET_AVX512F && TARGET_EVEX512 + (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL && !TARGET_PREFER_AVX256") (match_test "<mask_avx512vl_condition>") (const_string "*") @@ -14965,13 +14998,13 @@ [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand") (truncate:PMOV_DST_MODE_1 (match_operand:<pmov_src_mode> 1 "register_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn "*avx512f_<code><pmov_src_lower><mode>2" [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") (any_truncate:PMOV_DST_MODE_1 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "none,store") @@ -14993,7 +15026,7 @@ (const_int 10) (const_int 11) (const_int 12) (const_int 13) (const_int 14) (const_int 15)])))] - "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512BW && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -15018,7 +15051,7 @@ (const_int 10) (const_int 11) (const_int 12) (const_int 13) (const_int 14) (const_int 15)])))] - "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512BW && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -15102,7 +15135,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] - "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -15118,7 +15151,7 @@ (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")) (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "none,store") @@ -15132,19 +15165,19 @@ (match_operand:<pmov_src_mode> 1 "register_operand")) (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_expand "truncv32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand") (truncate:V32QI (match_operand:V32HI 1 "register_operand")))] - "TARGET_AVX512BW && TARGET_EVEX512") + "TARGET_AVX512BW") (define_insn "avx512bw_<code>v32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") (any_truncate:V32QI (match_operand:V32HI 1 "register_operand" "v,v")))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "none,store") @@ -15174,7 +15207,7 @@ (const_int 26) (const_int 27) (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] - "TARGET_AVX512VBMI && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512VBMI && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -15190,7 +15223,7 @@ (match_operand:V32HI 1 "register_operand" "v,v")) (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0") (match_operand:SI 3 "register_operand" "Yk,Yk")))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "none,store") @@ -15204,7 +15237,7 @@ (match_operand:V32HI 1 "register_operand")) (match_dup 0) (match_operand:SI 2 "register_operand")))] - "TARGET_AVX512BW && TARGET_EVEX512") + "TARGET_AVX512BW") (define_mode_iterator PMOV_DST_MODE_2 [V4SI V8HI (V16QI "TARGET_AVX512BW")]) @@ -16062,7 +16095,7 @@ [(set (match_operand:V8QI 0 "register_operand") (truncate:V8QI (match_operand:V8DI 1 "register_operand")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { rtx op0 = gen_reg_rtx (V16QImode); @@ -16082,7 +16115,7 @@ (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -16092,7 +16125,7 @@ [(set (match_operand:V8QI 0 "memory_operand" "=m") (any_truncate:V8QI (match_operand:V8DI 1 "register_operand" "v")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") @@ -16104,7 +16137,7 @@ (subreg:DI (any_truncate:V8QI (match_operand:V8DI 1 "register_operand")) 0))] - "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -16128,7 +16161,7 @@ (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -16149,7 +16182,7 @@ (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0) (const_int 0)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -16162,7 +16195,7 @@ (match_operand:V8DI 1 "register_operand" "v")) (match_dup 0) (match_operand:QI 2 "register_operand" "Yk")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") @@ -16174,7 +16207,7 @@ (any_truncate:V8QI (match_operand:V8DI 1 "register_operand")) (match_operand:QI 2 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { operands[0] = adjust_address_nv (operands[0], V8QImode, 0); emit_insn (gen_avx512f_<code>v8div16qi2_mask_store_1 (operands[0], @@ -16431,7 +16464,7 @@ (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)])))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);") (define_insn "*vec_widen_umult_even_v16si<mask_name>" @@ -16451,7 +16484,7 @@ (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)])))))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sseimul") @@ -16547,7 +16580,7 @@ (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)])))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);") (define_insn "*vec_widen_smult_even_v16si<mask_name>" @@ -16567,7 +16600,7 @@ (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)])))))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sseimul") @@ -16969,7 +17002,7 @@ "TARGET_SSE2" { /* Try with vnni instructions. */ - if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI && TARGET_EVEX512) + if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI) || (<MODE_SIZE> < 64 && ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI))) { @@ -17062,7 +17095,7 @@ (match_operand:V64QI 1 "register_operand") (match_operand:V64QI 2 "nonimmediate_operand") (match_operand:V16SI 3 "nonimmediate_operand")] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" { rtx t1 = gen_reg_rtx (V8DImode); rtx t2 = gen_reg_rtx (V16SImode); @@ -18300,13 +18333,10 @@ (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") (V16HF "TARGET_AVX512FP16") - (V16SF "TARGET_AVX512F && TARGET_EVEX512") - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V16SI "TARGET_AVX512F && TARGET_EVEX512") - (V8DI "TARGET_AVX512F && TARGET_EVEX512") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") - (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512") - (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")]) + (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") + (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") + (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI") + (V32HF "TARGET_AVX512FP16")]) (define_expand "vec_perm<mode>" [(match_operand:VEC_PERM_AVX2 0 "register_operand") @@ -18333,7 +18363,7 @@ { operands[2] = CONSTM1_RTX (<MODE>mode); - if (!TARGET_AVX512F || (!TARGET_AVX512VL && !TARGET_EVEX512)) + if (!TARGET_AVX512F) operands[2] = force_reg (<MODE>mode, operands[2]); }) @@ -18342,7 +18372,6 @@ (xor:VI (match_operand:VI 1 "bcst_vector_operand" " 0, m,Br") (match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))] "TARGET_AVX512F - && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512) && (!<mask_applied> || <ssescalarmode>mode == SImode || <ssescalarmode>mode == DImode)" @@ -18409,7 +18438,7 @@ (match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC"))) (unspec [(match_operand:VI 3 "register_operand" "0,0,0")] UNSPEC_INSN_FALSE_DEP)] - "TARGET_AVX512F && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)" + "TARGET_AVX512F" { if (TARGET_AVX512VL) return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}"; @@ -18433,7 +18462,7 @@ (not:<ssescalarmode> (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))))] "<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)" + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" [(set (match_dup 0) (xor:VI48_AVX512F (vec_duplicate:VI48_AVX512F (match_dup 1)) @@ -18587,8 +18616,7 @@ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") (eq_attr "alternative" "4") (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 - && !TARGET_PREFER_AVX256)") + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") ] (const_string "*")))]) @@ -18632,7 +18660,7 @@ (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))) (match_operand:VI 2 "vector_operand")))] "<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)" + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" [(set (match_dup 3) (vec_duplicate:VI (match_dup 1))) (set (match_dup 0) @@ -18647,7 +18675,7 @@ (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))) (match_operand:VI 2 "vector_operand")))] "<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)" + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" [(set (match_dup 3) (vec_duplicate:VI (match_dup 1))) (set (match_dup 0) @@ -18941,7 +18969,7 @@ (match_operand:VI 1 "bcst_vector_operand" "0,m, 0,vBr")) (match_operand:VI 2 "bcst_vector_operand" "m,0,vBr, 0")))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && (register_operand (operands[1], <MODE>mode) || register_operand (operands[2], <MODE>mode))" { @@ -18974,7 +19002,7 @@ (match_operand:VI 1 "bcst_vector_operand" "%0, 0") (match_operand:VI 2 "bcst_vector_operand" " m,vBr"))))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && (register_operand (operands[1], <MODE>mode) || register_operand (operands[2], <MODE>mode))" { @@ -19005,7 +19033,7 @@ (not:VI (match_operand:VI 1 "bcst_vector_operand" "%0, 0")) (not:VI (match_operand:VI 2 "bcst_vector_operand" "m,vBr"))))] "(<MODE_SIZE> == 64 || TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && (register_operand (operands[1], <MODE>mode) || register_operand (operands[2], <MODE>mode))" { @@ -19027,7 +19055,7 @@ (const_string "*")))]) (define_mode_iterator AVX512ZEXTMASK - [(DI "TARGET_AVX512BW && TARGET_EVEX512") (SI "TARGET_AVX512BW") HI]) + [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI]) (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") @@ -19276,7 +19304,7 @@ (const_int 60) (const_int 61) (const_int 62) (const_int 63)])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "<mask_prefix>") @@ -19345,7 +19373,7 @@ (const_int 14) (const_int 15) (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "<mask_prefix>") @@ -19407,7 +19435,7 @@ (const_int 61) (const_int 125) (const_int 62) (const_int 126) (const_int 63) (const_int 127)])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -19503,7 +19531,7 @@ (const_int 53) (const_int 117) (const_int 54) (const_int 118) (const_int 55) (const_int 119)])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -19727,7 +19755,7 @@ (const_int 11) (const_int 27) (const_int 14) (const_int 30) (const_int 15) (const_int 31)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -19782,7 +19810,7 @@ (const_int 9) (const_int 25) (const_int 12) (const_int 28) (const_int 13) (const_int 29)])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -20488,7 +20516,7 @@ (match_operand:SI 2 "const_0_to_255_operand") (match_operand:V16SI 3 "register_operand") (match_operand:HI 4 "register_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int mask = INTVAL (operands[2]); emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1], @@ -20532,7 +20560,7 @@ (match_operand 15 "const_12_to_15_operand") (match_operand 16 "const_12_to_15_operand") (match_operand 17 "const_12_to_15_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512 + "TARGET_AVX512F && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) @@ -20698,7 +20726,7 @@ [(match_operand:V32HI 1 "nonimmediate_operand" "vm") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_PSHUFLW))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -20874,7 +20902,7 @@ [(match_operand:V32HI 1 "nonimmediate_operand" "vm") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_PSHUFHW))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -21408,7 +21436,7 @@ (match_operand:V4TI 1 "register_operand" "v") (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vextracti32x4\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "length_immediate" "1") @@ -21416,7 +21444,7 @@ (set_attr "mode" "XI")]) (define_mode_iterator VEXTRACTI128_MODE - [(V4TI "TARGET_AVX512F && TARGET_EVEX512") V2TI]) + [(V4TI "TARGET_AVX512F") V2TI]) (define_split [(set (match_operand:TI 0 "nonimmediate_operand") @@ -21439,7 +21467,7 @@ && VECTOR_MODE_P (GET_MODE (operands[1])) && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16) || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32) - || (TARGET_AVX512F && TARGET_EVEX512 + || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64)) && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))" [(set (match_dup 0) (vec_select:SWI48x (match_dup 1) @@ -22814,7 +22842,7 @@ (const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1))))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sseimul") (set_attr "prefix" "evex") @@ -23328,10 +23356,10 @@ ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI ;; modes for abs instruction on pre AVX-512 targets. (define_mode_iterator VI1248_AVX512VL_AVX512BW - [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX512VL") + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_insn "*abs<mode>2" @@ -24159,7 +24187,7 @@ [(set (match_operand:V32HI 0 "register_operand" "=v") (any_extend:V32HI (match_operand:V32QI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24173,7 +24201,7 @@ (match_operand:V64QI 2 "const0_operand")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand")])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))] @@ -24193,7 +24221,7 @@ (match_operand:V64QI 3 "const0_operand")) (match_parallel 4 "pmovzx_parallel" [(match_operand 5 "const_int_operand")])))] - "TARGET_AVX512BW && TARGET_EVEX512" + "TARGET_AVX512BW" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))] @@ -24206,7 +24234,7 @@ [(set (match_operand:V32HI 0 "register_operand") (any_extend:V32HI (match_operand:V32QI 1 "nonimmediate_operand")))] - "TARGET_AVX512BW && TARGET_EVEX512") + "TARGET_AVX512BW") (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>" [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw") @@ -24354,7 +24382,7 @@ [(set (match_operand:V16SI 0 "register_operand" "=v") (any_extend:V16SI (match_operand:V16QI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24364,7 +24392,7 @@ [(set (match_operand:V16SI 0 "register_operand") (any_extend:V16SI (match_operand:V16QI 1 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn "avx2_<code>v8qiv8si2<mask_name>" [(set (match_operand:V8SI 0 "register_operand" "=v") @@ -24497,7 +24525,7 @@ [(set (match_operand:V16SI 0 "register_operand" "=v") (any_extend:V16SI (match_operand:V16HI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24507,7 +24535,7 @@ [(set (match_operand:V16SI 0 "register_operand") (any_extend:V16SI (match_operand:V16HI 1 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1" [(set (match_operand:V32HI 0 "register_operand" "=v") @@ -24517,7 +24545,7 @@ (match_operand:V32HI 2 "const0_operand")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))] @@ -24741,7 +24769,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24751,7 +24779,7 @@ [(set (match_operand:V8DI 0 "register_operand" "=v") (any_extend:V8DI (match_operand:V8QI 1 "memory_operand" "m")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24769,7 +24797,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] - "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -24780,7 +24808,7 @@ [(set (match_operand:V8DI 0 "register_operand") (any_extend:V8DI (match_operand:V8QI 1 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { if (!MEM_P (operands[1])) { @@ -24922,7 +24950,7 @@ [(set (match_operand:V8DI 0 "register_operand" "=v") (any_extend:V8DI (match_operand:V8HI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -24932,7 +24960,7 @@ [(set (match_operand:V8DI 0 "register_operand") (any_extend:V8DI (match_operand:V8HI 1 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn "avx2_<code>v4hiv4di2<mask_name>" [(set (match_operand:V4DI 0 "register_operand" "=v") @@ -25059,7 +25087,7 @@ [(set (match_operand:V8DI 0 "register_operand" "=v") (any_extend:V8DI (match_operand:V8SI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") @@ -25073,7 +25101,7 @@ (match_operand:V16SI 2 "const0_operand")) (match_parallel 3 "pmovzx_parallel" [(match_operand 4 "const_int_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))] @@ -25092,7 +25120,7 @@ (match_operand:V16SI 3 "const0_operand")) (match_parallel 4 "pmovzx_parallel" [(match_operand 5 "const_int_operand")])))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "#" "&& reload_completed" [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))] @@ -25104,7 +25132,7 @@ [(set (match_operand:V8DI 0 "register_operand" "=v") (any_extend:V8DI (match_operand:V8SI 1 "nonimmediate_operand" "vm")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_insn "avx2_<code>v4siv4di2<mask_name>" [(set (match_operand:V4DI 0 "register_operand" "=v") @@ -25505,7 +25533,7 @@ [(match_operand:V16SI 0 "register_operand") (match_operand:V16SF 1 "nonimmediate_operand") (match_operand:SI 2 "const_0_to_15_operand")] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { rtx tmp = gen_reg_rtx (V16SFmode); emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2])); @@ -26723,7 +26751,7 @@ (ashiftrt:V8DI (match_operand:V8DI 1 "register_operand") (match_operand:V8DI 2 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_expand "vashrv4di3" [(set (match_operand:V4DI 0 "register_operand") @@ -26814,7 +26842,7 @@ [(set (match_operand:V16SI 0 "register_operand") (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand") (match_operand:V16SI 2 "nonimmediate_operand")))] - "TARGET_AVX512F && TARGET_EVEX512") + "TARGET_AVX512F") (define_expand "vashrv8si3" [(set (match_operand:V8SI 0 "register_operand") @@ -27257,12 +27285,12 @@ (set_attr "mode" "OI")]) (define_mode_attr pbroadcast_evex_isa - [(V64QI "avx512bw_512") (V32QI "avx512bw") (V16QI "avx512bw") - (V32HI "avx512bw_512") (V16HI "avx512bw") (V8HI "avx512bw") - (V16SI "avx512f_512") (V8SI "avx512f") (V4SI "avx512f") - (V8DI "avx512f_512") (V4DI "avx512f") (V2DI "avx512f") - (V32HF "avx512bw_512") (V16HF "avx512bw") (V8HF "avx512bw") - (V32BF "avx512bw_512") (V16BF "avx512bw") (V8BF "avx512bw")]) + [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw") + (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw") + (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f") + (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f") + (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw") + (V32BF "avx512bw") (V16BF "avx512bw") (V8BF "avx512bw")]) (define_insn "avx2_pbroadcast<mode>" [(set (match_operand:VIHFBF 0 "register_operand" "=x,v") @@ -27806,7 +27834,7 @@ (set (attr "enabled") (if_then_else (eq_attr "alternative" "1") (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL - && TARGET_EVEX512 && !TARGET_PREFER_AVX256") + && !TARGET_PREFER_AVX256") (const_string "*")))]) (define_insn "*vec_dupv4si" @@ -27834,7 +27862,7 @@ (set (attr "enabled") (if_then_else (eq_attr "alternative" "1") (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL - && TARGET_EVEX512 && !TARGET_PREFER_AVX256") + && !TARGET_PREFER_AVX256") (const_string "*")))]) (define_insn "*vec_dupv2di" @@ -27849,7 +27877,7 @@ %vmovddup\t{%1, %0|%0, %1} movlhps\t%0, %0" [(set_attr "isa" "sse2_noavx,avx,avx512f,sse3,noavx") - (set_attr "type" "sselog1,sselog1,ssemov,sselog1,ssemov") + (set_attr "type" "sselog1,sselog1,ssemov,ssemov,ssemov") (set_attr "prefix" "orig,maybe_evex,evex,maybe_vex,orig") (set (attr "mode") (cond [(and (eq_attr "alternative" "2") @@ -27865,8 +27893,7 @@ (if_then_else (eq_attr "alternative" "2") (symbol_ref "TARGET_AVX512VL - || (TARGET_AVX512F && TARGET_EVEX512 - && !TARGET_PREFER_AVX256)") + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") (const_string "*")))]) (define_insn "avx2_vbroadcasti128_<mode>" @@ -27946,7 +27973,7 @@ [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_evex") - (set_attr "isa" "avx2,noavx2,avx2,avx512f_512,noavx2") + (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2") (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")]) (define_split @@ -28010,8 +28037,8 @@ ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si. (define_mode_iterator VI4F_BRCST32x2 - [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") - (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")]) + [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V16SF (V8SF "TARGET_AVX512VL")]) (define_mode_attr 64x2mode [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")]) @@ -28061,8 +28088,7 @@ ;; For broadcast[i|f]64x2 (define_mode_iterator VI8F_BRCST64x2 - [(V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512") - (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) + [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1" [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v") @@ -28118,27 +28144,26 @@ (set_attr "mode" "<sseinsnmode>")]) (define_mode_iterator VPERMI2 - [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") - (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512") + [V16SI V16SF V8DI V8DF (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") - (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) (define_mode_iterator VPERMI2I - [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512") + [V16SI V8DI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") - (V32HI "TARGET_AVX512BW && TARGET_EVEX512") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL") (V8HI "TARGET_AVX512BW && TARGET_AVX512VL") - (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512") + (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL") (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")]) @@ -28813,29 +28838,28 @@ ;; Modes handled by vec_init expanders. (define_mode_iterator VEC_INIT_MODE - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI - (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") - (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2") - (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2") + (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")]) ;; Likewise, but for initialization from half sized vectors. ;; Thus, these are all VEC_INIT_MODE modes except V2??. (define_mode_iterator VEC_INIT_HALF_MODE - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") - (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF - (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF - (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF - (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") - (V4TI "TARGET_AVX512F && TARGET_EVEX512")]) + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") + (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF + (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF + (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") + (V4TI "TARGET_AVX512F")]) (define_expand "vec_init<mode><ssescalarmodelower>" [(match_operand:VEC_INIT_MODE 0 "register_operand") @@ -29096,7 +29120,7 @@ (unspec:V16SF [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_VCVTPH2PS))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -29186,7 +29210,7 @@ UNSPEC_VCVTPS2PH) (match_operand:V16HI 3 "nonimm_or_0_operand") (match_operand:HI 4 "register_operand")))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" { int round = INTVAL (operands[2]); /* Separate {sae} from rounding control imm, @@ -29205,7 +29229,7 @@ [(match_operand:V16SF 1 "register_operand" "v") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_VCVTPS2PH))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtps2ph\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -29217,7 +29241,7 @@ [(match_operand:V16SF 1 "register_operand" "v") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_VCVTPS2PH))] - "TARGET_AVX512F && TARGET_EVEX512" + "TARGET_AVX512F" "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") @@ -30196,7 +30220,7 @@ (match_operand:V8DI 2 "register_operand" "v") (match_operand:V8DI 3 "nonimmediate_operand" "vm")] VPMADD52))] - "TARGET_AVX512IFMA && TARGET_EVEX512" + "TARGET_AVX512IFMA" "vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "prefix" "evex") @@ -30567,7 +30591,7 @@ (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPDPBUSD))] - "TARGET_AVX512VNNI && TARGET_EVEX512" + "TARGET_AVX512VNNI" "vpdpbusd\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -30636,7 +30660,7 @@ (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPDPBUSDS))] - "TARGET_AVX512VNNI && TARGET_EVEX512" + "TARGET_AVX512VNNI" "vpdpbusds\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -30705,7 +30729,7 @@ (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPDPWSSD))] - "TARGET_AVX512VNNI && TARGET_EVEX512" + "TARGET_AVX512VNNI" "vpdpwssd\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -30774,7 +30798,7 @@ (match_operand:V16SI 2 "register_operand" "v") (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPDPWSSDS))] - "TARGET_AVX512VNNI && TARGET_EVEX512" + "TARGET_AVX512VNNI" "vpdpwssds\t{%3, %2, %0|%0, %2, %3}" [(set_attr ("prefix") ("evex"))]) @@ -30930,8 +30954,7 @@ (set_attr "mode" "<sseinsnmode>")]) (define_mode_iterator VI48_AVX512VP2VL - [(V8DI "TARGET_EVEX512") - (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") + [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) (define_mode_iterator MASK_DWI [P2QI P2HI]) @@ -30973,12 +30996,12 @@ (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v") (match_operand:V16SI 2 "vector_operand" "vm")] UNSPEC_VP2INTERSECT))] - "TARGET_AVX512VP2INTERSECT && TARGET_EVEX512" + "TARGET_AVX512VP2INTERSECT" "vp2intersectd\t{%2, %1, %0|%0, %1, %2}" [(set_attr ("prefix") ("evex"))]) (define_mode_iterator VF_AVX512BF16VL - [(V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) + [V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) ;; Converting from BF to SF (define_mode_attr bf16_cvt_2sf [(V32BF "V16SF") (V16BF "V8SF") (V8BF "V4SF")]) @@ -31098,7 +31121,7 @@ "vcvtneps2bf16{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}") (define_mode_iterator VF1_AVX512_256 - [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")]) + [V16SF (V8SF "TARGET_AVX512VL")]) (define_expand "avx512f_cvtneps2bf16_<mode>_maskz" [(match_operand:<sf_cvt_bf16> 0 "register_operand") @@ -31144,7 +31167,7 @@ [(set (match_operand:V16BF 0 "register_operand") (float_truncate:V16BF (match_operand:V16SF 1 "nonimmediate_operand")))] - "TARGET_AVX512BW && TARGET_EVEX512 + "TARGET_AVX512BW && !HONOR_NANS (BFmode) && !flag_rounding_math && (flag_unsafe_math_optimizations || TARGET_AVX512BF16)" { @@ -31428,10 +31451,10 @@ ;; vinserti64x4 $0x1, %ymm15, %zmm15, %zmm15 (define_mode_iterator INT_BROADCAST_MODE - [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI - (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI - (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI - (V8DI "TARGET_AVX512F && TARGET_EVEX512 && TARGET_64BIT") + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI + (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI + (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI + (V8DI "TARGET_AVX512F && TARGET_64BIT") (V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")]) ;; Broadcast from an integer. NB: Enable broadcast only if we can move @@ -31705,8 +31728,8 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_cvt2ps2phx_<mode><mask_name><round_name>" - [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v") - (vec_concat:VHF_AVX10_2 + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v") + (vec_concat:VHF_AVX512VL (float_truncate:<ssehalfvecmode> (match_operand:<ssePSmode> 2 "<round_nimm_predicate>" "<round_constraint>")) (float_truncate:<ssehalfvecmode> @@ -31730,8 +31753,8 @@ (define_insn "vcvt<convertfp8_pack><mode><mask_name>" [(set (match_operand:<ssebvecmode> 0 "register_operand" "=v") (unspec:<ssebvecmode> - [(match_operand:VHF_AVX10_2 1 "register_operand" "v") - (match_operand:VHF_AVX10_2 2 "nonimmediate_operand" "vm")] + [(match_operand:VHF_AVX512VL 1 "register_operand" "v") + (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_CONVERTFP8_PACK))] "TARGET_AVX10_2" "vcvt<convertfp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand2>, %1, %2}" @@ -31814,7 +31837,7 @@ [(set_attr "prefix" "evex")]) (define_mode_iterator VHF_AVX10_2_2 - [(V32HF "TARGET_AVX10_2") V16HF]) + [V32HF V16HF]) (define_insn "vcvt<biasph2fp8_pack><mode><mask_name>" [(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v") @@ -31911,8 +31934,8 @@ [(set_attr "prefix" "evex")]) (define_insn "vcvthf82ph<mode><mask_name>" - [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v") - (unspec:VHF_AVX10_2 + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v") + (unspec:VHF_AVX512VL [(match_operand:<ssebvecmode_2> 1 "nonimmediate_operand" "vm")] UNSPEC_VCVTHF82PH))] "TARGET_AVX10_2" @@ -31934,8 +31957,8 @@ (define_expand "usdot_prod<sseunpackmodelower><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI2_AVX10_2 1 "register_operand") - (match_operand:VI2_AVX10_2 2 "register_operand") + (match_operand:VI2_AVX512F 1 "register_operand") + (match_operand:VI2_AVX512F 2 "register_operand") (match_operand:<sseunpackmode> 3 "register_operand")] "TARGET_AVXVNNIINT16 || TARGET_AVX10_2" { @@ -31952,8 +31975,8 @@ (define_expand "udot_prod<sseunpackmodelower><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI2_AVX10_2 1 "register_operand") - (match_operand:VI2_AVX10_2 2 "register_operand") + (match_operand:VI2_AVX512F 1 "register_operand") + (match_operand:VI2_AVX512F 2 "register_operand") (match_operand:<sseunpackmode> 3 "register_operand")] "TARGET_AVXVNNIINT16 || TARGET_AVX10_2" { @@ -32032,23 +32055,23 @@ [(set_attr "prefix" "evex")]) (define_insn "vdpphps_<mode>" - [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v") - (unspec:VF1_AVX10_2 - [(match_operand:VF1_AVX10_2 1 "register_operand" "0") - (match_operand:VF1_AVX10_2 2 "register_operand" "v") - (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") + (unspec:VF1_AVX512VL + [(match_operand:VF1_AVX512VL 1 "register_operand" "0") + (match_operand:VF1_AVX512VL 2 "register_operand" "v") + (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")] UNSPEC_VDPPHPS))] "TARGET_AVX10_2" "vdpphps\t{%3, %2, %0|%0, %2, %3}" [(set_attr "prefix" "evex")]) (define_insn "vdpphps_<mode>_mask" - [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v") - (vec_merge:VF1_AVX10_2 - (unspec:VF1_AVX10_2 - [(match_operand:VF1_AVX10_2 1 "register_operand" "0") - (match_operand:VF1_AVX10_2 2 "register_operand" "v") - (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") + (vec_merge:VF1_AVX512VL + (unspec:VF1_AVX512VL + [(match_operand:VF1_AVX512VL 1 "register_operand" "0") + (match_operand:VF1_AVX512VL 2 "register_operand" "v") + (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")] UNSPEC_VDPPHPS) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] @@ -32057,10 +32080,10 @@ [(set_attr "prefix" "evex")]) (define_expand "vdpphps_<mode>_maskz" - [(match_operand:VF1_AVX10_2 0 "register_operand") - (match_operand:VF1_AVX10_2 1 "register_operand") - (match_operand:VF1_AVX10_2 2 "register_operand") - (match_operand:VF1_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VF1_AVX512VL 0 "register_operand") + (match_operand:VF1_AVX512VL 1 "register_operand") + (match_operand:VF1_AVX512VL 2 "register_operand") + (match_operand:VF1_AVX512VL 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32070,60 +32093,60 @@ }) (define_insn "vdpphps_<mode>_maskz_1" - [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v") - (vec_merge:VF1_AVX10_2 - (unspec:VF1_AVX10_2 - [(match_operand:VF1_AVX10_2 1 "register_operand" "0") - (match_operand:VF1_AVX10_2 2 "register_operand" "v") - (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") + (vec_merge:VF1_AVX512VL + (unspec:VF1_AVX512VL + [(match_operand:VF1_AVX512VL 1 "register_operand" "0") + (match_operand:VF1_AVX512VL 2 "register_operand" "v") + (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")] UNSPEC_VDPPHPS) - (match_operand:VF1_AVX10_2 4 "const0_operand" "C") + (match_operand:VF1_AVX512VL 4 "const0_operand" "C") (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] "TARGET_AVX10_2" "vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_scalefbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")] + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "nonimmediate_operand" "vm")] UNSPEC_VSCALEFBF16))] "TARGET_AVX10_2" "vscalefbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "prefix" "evex")]) (define_expand "<code><mode>3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand") - (smaxmin:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "register_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))] + [(set (match_operand:VBF 0 "register_operand") + (smaxmin:VBF + (match_operand:VBF 1 "register_operand") + (match_operand:VBF 2 "nonimmediate_operand")))] "TARGET_AVX10_2") (define_insn "avx10_2_<code>bf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (smaxmin:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))] + [(set (match_operand:VBF 0 "register_operand" "=v") + (smaxmin:VBF + (match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2" "v<maxmin_float>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) (define_insn "avx10_2_<insn>bf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (plusminusmultdiv:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))] + [(set (match_operand:VBF 0 "register_operand" "=v") + (plusminusmultdiv:VBF + (match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2" "v<insn>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "prefix" "evex")]) (define_expand "avx10_2_fmaddbf16_<mode>_maskz" - [(match_operand:VBF_AVX10_2 0 "register_operand") - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VBF 0 "register_operand") + (match_operand:VBF 1 "nonimmediate_operand") + (match_operand:VBF 2 "nonimmediate_operand") + (match_operand:VBF 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32135,11 +32158,11 @@ }) (define_insn "avx10_2_fmaddbf16_<mode><sd_maskz_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))] + [(set (match_operand:VBF 0 "register_operand" "=v,v,v") + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm") + (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))] "TARGET_AVX10_2" "@ vfmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} @@ -32150,12 +32173,12 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fmaddbf16_<mode>_mask" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")) + [(set (match_operand:VBF 0 "register_operand" "=v,v") + (vec_merge:VBF + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "0,0") + (match_operand:VBF 2 "nonimmediate_operand" "vm,v") + (match_operand:VBF 3 "nonimmediate_operand" "v,vm")) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2" @@ -32167,12 +32190,12 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fmaddbf16_<mode>_mask3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")) + [(set (match_operand:VBF 0 "register_operand" "=v") + (vec_merge:VBF + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%v") + (match_operand:VBF 2 "nonimmediate_operand" "vm") + (match_operand:VBF 3 "nonimmediate_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2" @@ -32182,10 +32205,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_expand "avx10_2_fnmaddbf16_<mode>_maskz" - [(match_operand:VBF_AVX10_2 0 "register_operand") - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VBF 0 "register_operand") + (match_operand:VBF 1 "nonimmediate_operand") + (match_operand:VBF 2 "nonimmediate_operand") + (match_operand:VBF 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32197,12 +32220,12 @@ }) (define_insn "avx10_2_fnmaddbf16_<mode><sd_maskz_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))] + [(set (match_operand:VBF 0 "register_operand" "=v,v,v") + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")) + (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm") + (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))] "TARGET_AVX10_2" "@ vfnmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} @@ -32213,13 +32236,13 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fnmaddbf16_<mode>_mask" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")) + [(set (match_operand:VBF 0 "register_operand" "=v,v") + (vec_merge:VBF + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "0,0")) + (match_operand:VBF 2 "nonimmediate_operand" "vm,v") + (match_operand:VBF 3 "nonimmediate_operand" "v,vm")) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2" @@ -32231,13 +32254,13 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fnmaddbf16_<mode>_mask3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")) + [(set (match_operand:VBF 0 "register_operand" "=v") + (vec_merge:VBF + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%v")) + (match_operand:VBF 2 "nonimmediate_operand" "vm") + (match_operand:VBF 3 "nonimmediate_operand" "0")) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2" @@ -32247,10 +32270,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_expand "avx10_2_fmsubbf16_<mode>_maskz" - [(match_operand:VBF_AVX10_2 0 "register_operand") - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VBF 0 "register_operand") + (match_operand:VBF 1 "nonimmediate_operand") + (match_operand:VBF 2 "nonimmediate_operand") + (match_operand:VBF 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32262,12 +32285,12 @@ }) (define_insn "avx10_2_fmsubbf16_<mode><sd_maskz_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))] + [(set (match_operand:VBF 0 "register_operand" "=v,v,v") + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v") + (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))] "TARGET_AVX10_2" "@ vfmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} @@ -32278,13 +32301,13 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fmsubbf16_<mode>_mask" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) + [(set (match_operand:VBF 0 "register_operand" "=v,v") + (vec_merge:VBF + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "0,0") + (match_operand:VBF 2 "nonimmediate_operand" "vm,v") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2" @@ -32296,13 +32319,13 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fmsubbf16_<mode>_mask3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))) + [(set (match_operand:VBF 0 "register_operand" "=v") + (vec_merge:VBF + (fma:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%v") + (match_operand:VBF 2 "nonimmediate_operand" "vm") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2" @@ -32312,10 +32335,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_expand "avx10_2_fnmsubbf16_<mode>_maskz" - [(match_operand:VBF_AVX10_2 0 "register_operand") - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand") - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand") + [(match_operand:VBF 0 "register_operand") + (match_operand:VBF 1 "nonimmediate_operand") + (match_operand:VBF 2 "nonimmediate_operand") + (match_operand:VBF 3 "nonimmediate_operand") (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX10_2" { @@ -32327,13 +32350,13 @@ }) (define_insn "avx10_2_fnmsubbf16_<mode><sd_maskz_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v") - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))] + [(set (match_operand:VBF 0 "register_operand" "=v,v,v") + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")) + (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))] "TARGET_AVX10_2" "@ vfnmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2} @@ -32344,14 +32367,14 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fnmsubbf16_<mode>_mask" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))) + [(set (match_operand:VBF 0 "register_operand" "=v,v") + (vec_merge:VBF + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "0,0")) + (match_operand:VBF 2 "nonimmediate_operand" "vm,v") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))) (match_dup 1) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] "TARGET_AVX10_2" @@ -32363,14 +32386,14 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_fnmsubbf16_<mode>_mask3" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (vec_merge:VBF_AVX10_2 - (fma:VBF_AVX10_2 - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")) - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") - (neg:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))) + [(set (match_operand:VBF 0 "register_operand" "=v") + (vec_merge:VBF + (fma:VBF + (neg:VBF + (match_operand:VBF 1 "nonimmediate_operand" "%v")) + (match_operand:VBF 2 "nonimmediate_operand" "vm") + (neg:VBF + (match_operand:VBF 3 "nonimmediate_operand" "0"))) (match_dup 3) (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] "TARGET_AVX10_2" @@ -32380,35 +32403,35 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_rsqrtbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")] + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "nonimmediate_operand" "vm")] UNSPEC_RSQRT))] "TARGET_AVX10_2" "vrsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_sqrtbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (sqrt:VBF_AVX10_2 - (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")))] + [(set (match_operand:VBF 0 "register_operand" "=v") + (sqrt:VBF + (match_operand:VBF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2" "vsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_rcpbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")] + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "nonimmediate_operand" "vm")] UNSPEC_RCP))] "TARGET_AVX10_2" "vrcpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "prefix" "evex")]) (define_insn "avx10_2_getexpbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")] + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "nonimmediate_operand" "vm")] UNSPEC_GETEXP))] "TARGET_AVX10_2" "vgetexpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" @@ -32425,9 +32448,9 @@ (UNSPEC_VGETMANTBF16 "getmant")]) (define_insn "avx10_2_<bf16immop>bf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm") + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "nonimmediate_operand" "vm") (match_operand:SI 2 "const_0_to_255_operand")] BF16IMMOP))] "TARGET_AVX10_2" @@ -32437,7 +32460,7 @@ (define_insn "avx10_2_fpclassbf16_<mode><mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> - [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm") + [(match_operand:VBF 1 "nonimmediate_operand" "vm") (match_operand 2 "const_0_to_255_operand")] UNSPEC_VFPCLASSBF16))] "TARGET_AVX10_2" @@ -32447,8 +32470,8 @@ (define_insn "avx10_2_cmpbf16_<mode><mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> - [(match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm") + [(match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "nonimmediate_operand" "vm") (match_operand 3 "const_0_to_31_operand" "n")] UNSPEC_PCMP))] "TARGET_AVX10_2" @@ -32486,7 +32509,7 @@ (define_insn "avx10_2_cvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs<mode><mask_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VBF_AVX10_2 1 "vector_operand" "vm")] + [(match_operand:VBF 1 "vector_operand" "vm")] UNSPEC_CVT_BF16_IBS_ITER))] "TARGET_AVX10_2" "vcvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" @@ -32501,7 +32524,7 @@ (define_insn "avx10_2_cvtph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VHF_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")] + [(match_operand:VHF_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_CVT_PH_IBS_ITER))] "TARGET_AVX10_2 && <round_mode512bit_condition>" "vcvtph2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" @@ -32516,7 +32539,7 @@ (define_insn "avx10_2_cvttph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VHF_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VHF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_CVTT_PH_IBS_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" "vcvttph2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" @@ -32531,7 +32554,7 @@ (define_insn "avx10_2_cvtps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VF1_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")] + [(match_operand:VF1_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_CVT_PS_IBS_ITER))] "TARGET_AVX10_2 && <round_mode512bit_condition>" "vcvtps2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" @@ -32546,7 +32569,7 @@ (define_insn "avx10_2_cvttps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>" [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") (unspec:<sseintvecmode> - [(match_operand:VF1_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VF1_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_CVTT_PS_IBS_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" "vcvttps2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" @@ -32565,7 +32588,7 @@ (define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>" [(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v") (unspec:<VEC_GATHER_IDXSI> - [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_SAT_CVT_DS_SIGN_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" "vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" @@ -32576,7 +32599,7 @@ (define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>" [(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v") (unspec:<VEC_GATHER_IDXDI> - [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] + [(match_operand:VF2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_SAT_CVT_DS_SIGN_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" "vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" @@ -32585,8 +32608,8 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v") - (unspec:VI8_AVX10_2 + [(set (match_operand:VI8 0 "register_operand" "=v") + (unspec:VI8 [(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] UNSPEC_SAT_CVT_DS_SIGN_ITER))] "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>" @@ -32622,10 +32645,10 @@ (set_attr "mode" "<MODE>")]) (define_insn "avx10_2_minmaxbf16_<mode><mask_name>" - [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") - (unspec:VBF_AVX10_2 - [(match_operand:VBF_AVX10_2 1 "register_operand" "v") - (match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr") + [(set (match_operand:VBF 0 "register_operand" "=v") + (unspec:VBF + [(match_operand:VBF 1 "register_operand" "v") + (match_operand:VBF 2 "bcst_vector_operand" "vmBr") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_MINMAXBF16))] "TARGET_AVX10_2" @@ -32634,10 +32657,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx10_2_minmaxp<mode><mask_name><round_saeonly_name>" - [(set (match_operand:VFH_AVX10_2 0 "register_operand" "=v") - (unspec:VFH_AVX10_2 - [(match_operand:VFH_AVX10_2 1 "register_operand" "v") - (match_operand:VFH_AVX10_2 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") + [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") + (unspec:VFH_AVX512VL + [(match_operand:VFH_AVX512VL 1 "register_operand" "v") + (match_operand:VFH_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_MINMAX))] "TARGET_AVX10_2" @@ -32661,9 +32684,9 @@ (set_attr "mode" "<ssescalarmode>")]) (define_insn "avx10_2_vmovrs<ssemodesuffix><mode><mask_name>" - [(set (match_operand:VI1248_AVX10_2 0 "register_operand" "=v") - (unspec:VI1248_AVX10_2 - [(match_operand:VI1248_AVX10_2 1 "memory_operand" "m")] + [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand" "=v") + (unspec:VI1248_AVX512VLBW + [(match_operand:VI1248_AVX512VLBW 1 "memory_operand" "m")] UNSPEC_VMOVRS))] "TARGET_AVX10_2 && TARGET_MOVRS" "vmovrs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h index 15d8e96..64f3c20 100644 --- a/gcc/config/i386/vaesintrin.h +++ b/gcc/config/i386/vaesintrin.h @@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B) #endif /* __DISABLE_VAES__ */ -#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__) +#if !defined(__VAES__) || !defined(__AVX512F__) #pragma GCC push_options -#pragma GCC target("vaes,avx512f,evex512") +#pragma GCC target("vaes,avx512f") #define __DISABLE_VAESF__ #endif /* __VAES__ */ diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h index 2b36c37..a02ab38 100644 --- a/gcc/config/i386/vpclmulqdqintrin.h +++ b/gcc/config/i386/vpclmulqdqintrin.h @@ -28,9 +28,9 @@ #ifndef _VPCLMULQDQINTRIN_H_INCLUDED #define _VPCLMULQDQINTRIN_H_INCLUDED -#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__) +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) #pragma GCC push_options -#pragma GCC target("vpclmulqdq,avx512f,evex512") +#pragma GCC target("vpclmulqdq,avx512f") #define __DISABLE_VPCLMULQDQF__ #endif /* __VPCLMULQDQF__ */ diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 7c8cb73..c8603b9 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -107,6 +107,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ in 128bit, 256bit and 512bit */ 4, 4, 6, /* cost of moving XMM,YMM,ZMM register */ 4, /* cost of moving SSE register to integer. */ + 4, /* cost of moving integer register to SSE. */ COSTS_N_BYTES (5), 0, /* Gather load static, per_elt. */ COSTS_N_BYTES (5), 0, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ @@ -121,16 +122,24 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (2), /* cost of FCHS instruction. */ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ - COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */ - COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_BYTES (2), /* cost of MULSS instruction. */ - COSTS_N_BYTES (2), /* cost of MULSD instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SS instruction. */ - COSTS_N_BYTES (2), /* cost of FMA SD instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ - COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ - COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of cheap SSE instruction. */ + COSTS_N_BYTES (4), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_BYTES (4), /* cost of MULSS instruction. */ + COSTS_N_BYTES (4), /* cost of MULSD instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SS instruction. */ + COSTS_N_BYTES (4), /* cost of FMA SD instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSS instruction. */ + COSTS_N_BYTES (4), /* cost of DIVSD instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSS instruction. */ + COSTS_N_BYTES (4), /* cost of SQRTSD instruction. */ + COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ + COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */ + 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -219,6 +228,7 @@ struct processor_costs i386_cost = { /* 386 specific costs */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ @@ -243,6 +253,13 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -330,6 +347,7 @@ struct processor_costs i486_cost = { /* 486 specific costs */ {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 4, /* size of l1 cache. 486 has 8kB cache @@ -356,6 +374,13 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -443,6 +468,7 @@ struct processor_costs pentium_cost = { {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -467,6 +493,13 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -547,6 +580,7 @@ struct processor_costs lakemont_cost = { {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -571,6 +605,13 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -666,6 +707,7 @@ struct processor_costs pentiumpro_cost = { {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -690,6 +732,13 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -776,6 +825,7 @@ struct processor_costs geode_cost = { {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 2, 2, /* Gather load static, per_elt. */ 2, 2, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -800,6 +850,13 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -886,6 +943,7 @@ struct processor_costs k6_cost = { {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 2, 2, /* Gather load static, per_elt. */ 2, 2, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -913,6 +971,13 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1002,6 +1067,7 @@ struct processor_costs athlon_cost = { {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 5, /* cost of moving SSE register to integer. */ + 5, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -1027,6 +1093,13 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1120,6 +1193,7 @@ struct processor_costs k8_cost = { {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 5, /* cost of moving SSE register to integer. */ + 5, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -1150,6 +1224,13 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1251,6 +1332,7 @@ struct processor_costs amdfam10_cost = { {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 3, /* cost of moving SSE register to integer. */ + 3, /* cost of moving integer register to SSE. */ 4, 4, /* Gather load static, per_elt. */ 4, 4, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -1281,6 +1363,13 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1374,6 +1463,7 @@ const struct processor_costs bdver_cost = { {10, 10, 10, 40, 60}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 16, /* cost of moving SSE register to integer. */ + 16, /* cost of moving integer register to SSE. */ 12, 12, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 16, /* size of l1 cache. */ @@ -1405,6 +1495,13 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1518,6 +1615,7 @@ struct processor_costs znver1_cost = { {8, 8, 8, 16, 32}, /* cost of unaligned stores. */ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, throughput 12. Approx 9 uops do not depend on vector size and every load is 7 uops. */ @@ -1553,6 +1651,14 @@ struct processor_costs znver1_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + /* Real latency is 4, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1677,6 +1783,7 @@ struct processor_costs znver2_cost = { 2, 2, 3, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, throughput 12. Approx 9 uops do not depend on vector size and every load is 7 uops. */ @@ -1712,6 +1819,13 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1812,6 +1926,7 @@ struct processor_costs znver3_cost = { 2, 2, 3, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops, throughput 9. Approx 7 uops do not depend on vector size and every load is 4 uops. */ @@ -1847,6 +1962,13 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1949,6 +2071,7 @@ struct processor_costs znver4_cost = { 2, 2, 2, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops, throughput 5. Approx 7 uops do not depend on vector size and every load is 5 uops. */ @@ -1984,6 +2107,14 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + /* Real latency is 6, but for split regs multiply cost of half op by 2. */ + COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2089,6 +2220,7 @@ struct processor_costs znver5_cost = { 2, 2, 2, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ /* TODO: gather and scatter instructions are currently disabled in x86-tune.def. In some cases they are however a win, see PR116582 @@ -2135,6 +2267,13 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ /* DIVSD has throughtput 0.13 and latency 20. */ COSTS_N_INSNS (20), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2250,6 +2389,7 @@ struct processor_costs skylake_cost = { {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 20, 8, /* Gather load static, per_elt. */ 22, 10, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -2274,6 +2414,13 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2379,6 +2526,7 @@ struct processor_costs icelake_cost = { {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 20, 8, /* Gather load static, per_elt. */ 22, 10, /* Gather store static, per_elt. */ 64, /* size of l1 cache. */ @@ -2403,6 +2551,13 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2502,6 +2657,7 @@ struct processor_costs alderlake_cost = { {8, 8, 8, 10, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -2526,6 +2682,13 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2618,6 +2781,7 @@ const struct processor_costs btver1_cost = { {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 14, /* cost of moving SSE register to integer. */ + 14, /* cost of moving integer register to SSE. */ 10, 10, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -2642,6 +2806,13 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2731,6 +2902,7 @@ const struct processor_costs btver2_cost = { {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 14, /* cost of moving SSE register to integer. */ + 14, /* cost of moving integer register to SSE. */ 10, 10, /* Gather load static, per_elt. */ 10, 10, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -2755,6 +2927,13 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2843,6 +3022,7 @@ struct processor_costs pentium4_cost = { {32, 32, 32, 64, 128}, /* cost of unaligned stores. */ 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */ 20, /* cost of moving SSE register to integer. */ + 20, /* cost of moving integer register to SSE. */ 16, 16, /* Gather load static, per_elt. */ 16, 16, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -2867,6 +3047,13 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -2958,6 +3145,7 @@ struct processor_costs nocona_cost = { {24, 24, 24, 48, 96}, /* cost of unaligned stores. */ 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */ 20, /* cost of moving SSE register to integer. */ + 20, /* cost of moving integer register to SSE. */ 12, 12, /* Gather load static, per_elt. */ 12, 12, /* Gather store static, per_elt. */ 8, /* size of l1 cache. */ @@ -2982,6 +3170,13 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3071,6 +3266,7 @@ struct processor_costs atom_cost = { {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 8, /* cost of moving SSE register to integer. */ + 8, /* cost of moving integer register to SSE. */ 8, 8, /* Gather load static, per_elt. */ 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3095,6 +3291,13 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3184,6 +3387,7 @@ struct processor_costs slm_cost = { {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ 8, /* cost of moving SSE register to integer. */ + 8, /* cost of moving integer register to SSE. */ 8, 8, /* Gather load static, per_elt. */ 8, 8, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3208,6 +3412,13 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3309,6 +3520,7 @@ struct processor_costs tremont_cost = { {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3335,6 +3547,13 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3349,119 +3568,6 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (2), /* Branch mispredict scale. */ }; -static stringop_algs intel_memcpy[2] = { - {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, - {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; -static stringop_algs intel_memset[2] = { - {libcall, {{8, loop, false}, {15, unrolled_loop, false}, - {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, - {libcall, {{24, loop, false}, {32, unrolled_loop, false}, - {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; -static const -struct processor_costs intel_cost = { - { - /* Start of register allocator costs. integer->integer move cost is 2. */ - 6, /* cost for loading QImode using movzbl */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers */ - 2, /* cost of reg,reg fld/fst */ - {6, 6, 8}, /* cost of loading fp registers - in SFmode, DFmode and XFmode */ - {6, 6, 10}, /* cost of storing fp registers - in SFmode, DFmode and XFmode */ - 2, /* cost of moving MMX register */ - {6, 6}, /* cost of loading MMX registers - in SImode and DImode */ - {6, 6}, /* cost of storing MMX registers - in SImode and DImode */ - 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ - {6, 6, 6, 6, 6}, /* cost of loading SSE registers - in 32,64,128,256 and 512-bit */ - {6, 6, 6, 6, 6}, /* cost of storing SSE registers - in 32,64,128,256 and 512-bit */ - 4, 4, /* SSE->integer and integer->SSE moves */ - 4, 4, /* mask->integer and integer->mask moves */ - {4, 4, 4}, /* cost of loading mask register - in QImode, HImode, SImode. */ - {6, 6, 6}, /* cost if storing mask register - in QImode, HImode, SImode. */ - 2, /* cost of moving mask register. */ - /* End of register allocator costs. */ - }, - - COSTS_N_INSNS (1), /* cost of an add instruction */ - COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ - COSTS_N_INSNS (1), /* variable shift costs */ - COSTS_N_INSNS (1), /* constant shift costs */ - {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ - COSTS_N_INSNS (3), /* HI */ - COSTS_N_INSNS (3), /* SI */ - COSTS_N_INSNS (4), /* DI */ - COSTS_N_INSNS (2)}, /* other */ - 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (26), /* HI */ - COSTS_N_INSNS (42), /* SI */ - COSTS_N_INSNS (74), /* DI */ - COSTS_N_INSNS (74)}, /* other */ - COSTS_N_INSNS (1), /* cost of movsx */ - COSTS_N_INSNS (1), /* cost of movzx */ - 8, /* "large" insn */ - 17, /* MOVE_RATIO */ - 6, /* CLEAR_RATIO */ - {4, 4, 4}, /* cost of loading integer registers - in QImode, HImode and SImode. - Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers */ - {6, 6, 6, 6, 6}, /* cost of loading SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit */ - {6, 6, 6, 6, 6}, /* cost of storing SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit */ - {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ - {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ - 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ - 4, /* cost of moving SSE register to integer. */ - 6, 6, /* Gather load static, per_elt. */ - 6, 6, /* Gather store static, per_elt. */ - 32, /* size of l1 cache. */ - 256, /* size of l2 cache. */ - 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ - 3, /* Branch cost */ - COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ - COSTS_N_INSNS (8), /* cost of FMUL instruction. */ - COSTS_N_INSNS (20), /* cost of FDIV instruction. */ - COSTS_N_INSNS (8), /* cost of FABS instruction. */ - COSTS_N_INSNS (8), /* cost of FCHS instruction. */ - COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ - - COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ - COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ - COSTS_N_INSNS (8), /* cost of MULSS instruction. */ - COSTS_N_INSNS (8), /* cost of MULSD instruction. */ - COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ - COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ - COSTS_N_INSNS (20), /* cost of DIVSS instruction. */ - COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ - COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ - COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ - 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ - intel_memcpy, - intel_memset, - COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ - COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ - "16", /* Loop alignment. */ - "16:8:8", /* Jump alignment. */ - "0:0:8", /* Label alignment. */ - "16", /* Func alignment. */ - 4, /* Small unroll limit. */ - 2, /* Small unroll factor. */ - COSTS_N_INSNS (2), /* Branch mispredict scale. */ -}; - /* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU. */ static stringop_algs lujiazui_memcpy[2] = { {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, @@ -3532,15 +3638,16 @@ struct processor_costs lujiazui_cost = { {6, 6, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ - {6, 6, 6}, /* cost of storing integer registers. */ + {6, 6, 6}, /* cost of storing integer registers. */ {6, 6, 6, 10, 15}, /* cost of loading SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit. */ + in 32bit, 64bit, 128bit, 256bit and 512bit. */ {6, 6, 6, 10, 15}, /* cost of storing SSE register - in 32bit, 64bit, 128bit, 256bit and 512bit. */ + in 32bit, 64bit, 128bit, 256bit and 512bit. */ {6, 6, 6, 10, 15}, /* cost of unaligned loads. */ {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ - 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ - 6, /* cost of moving SSE register to integer. */ + 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ + 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3566,6 +3673,13 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3658,6 +3772,7 @@ struct processor_costs yongfeng_cost = { {8, 8, 8, 12, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ 8, /* cost of moving SSE register to integer. */ + 8, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3682,6 +3797,13 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3774,6 +3896,7 @@ struct processor_costs shijidadao_cost = { {8, 8, 8, 12, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ 8, /* cost of moving SSE register to integer. */ + 8, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3798,6 +3921,13 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -3814,19 +3944,36 @@ struct processor_costs shijidadao_cost = { -/* Generic should produce code tuned for Core-i7 (and newer chips) - and btver1 (and newer chips). */ +/* Generic should produce code tuned for Haswell (and newer chips) + and znver1 (and newer chips): + 1. Don't align memory. + 2. For known sizes, prefer vector loop, unroll loop with 4 moves or + stores per iteration without aligning the loop, up to 256 bytes. + 3. For unknown sizes, use memcpy/memset. + 4. Since each loop iteration has 4 stores and 8 stores for zeroing + with unroll loop may be needed, change CLEAR_RATIO to 10 so that + zeroing up to 72 bytes are fully unrolled with 9 stores without + SSE. + */ static stringop_algs generic_memcpy[2] = { - {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}, + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}}; static stringop_algs generic_memset[2] = { - {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, - {-1, libcall, false}}}, - {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, - {-1, libcall, false}}}}; + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}, + {libcall, + {{256, vector_loop, true}, + {256, unrolled_loop, true}, + {-1, libcall, true}}}}; static const struct processor_costs generic_cost = { { @@ -3883,7 +4030,7 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (1), /* cost of movzx */ 8, /* "large" insn */ 17, /* MOVE_RATIO */ - 6, /* CLEAR_RATIO */ + 10, /* CLEAR_RATIO */ {6, 6, 6}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ @@ -3896,6 +4043,7 @@ struct processor_costs generic_cost = { {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ 6, /* cost of moving SSE register to integer. */ + 6, /* cost of moving integer register to SSE. */ 18, 6, /* Gather load static, per_elt. */ 18, 6, /* Gather store static, per_elt. */ 32, /* size of l1 cache. */ @@ -3922,6 +4070,13 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4022,6 +4177,7 @@ struct processor_costs core_cost = { {6, 6, 6, 6, 12}, /* cost of unaligned stores. */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ 2, /* cost of moving SSE register to integer. */ + 2, /* cost of moving integer register to SSE. */ /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops, rec. throughput 6. So 5 uops statically and one uops per load. */ @@ -4051,6 +4207,13 @@ struct processor_costs core_cost = { COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */ COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */ + COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ + COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index 685a83c..ff9c268 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -45,7 +45,6 @@ ix86_issue_rate (void) case PROCESSOR_LAKEMONT: case PROCESSOR_BONNELL: case PROCESSOR_SILVERMONT: - case PROCESSOR_INTEL: case PROCESSOR_K6: case PROCESSOR_BTVER2: case PROCESSOR_PENTIUM4: @@ -80,7 +79,17 @@ ix86_issue_rate (void) case PROCESSOR_ALDERLAKE: case PROCESSOR_YONGFENG: case PROCESSOR_SHIJIDADAO: + case PROCESSOR_SIERRAFOREST: + case PROCESSOR_INTEL: case PROCESSOR_GENERIC: + /* For znver5 decoder can handle 4 or 8 instructions per cycle, + op cache 12 instruction/cycle, dispatch 8 instructions + integer rename 8 instructions and Fp 6 instructions. + + The scheduler, without understanding out of order nature of the CPU + is not going to be able to use more than 4 instructions since that + is limits of the decoders. */ + case PROCESSOR_ZNVER5: return 4; case PROCESSOR_ICELAKE_CLIENT: @@ -91,13 +100,14 @@ ix86_issue_rate (void) return 5; case PROCESSOR_SAPPHIRERAPIDS: - /* For znver5 decoder can handle 4 or 8 instructions per cycle, - op cache 12 instruction/cycle, dispatch 8 instructions - integer rename 8 instructions and Fp 6 instructions. - - The scheduler, without understanding out of order nature of the CPU - is unlikely going to be able to fill all of these. */ - case PROCESSOR_ZNVER5: + case PROCESSOR_GRANITERAPIDS: + case PROCESSOR_GRANITERAPIDS_D: + case PROCESSOR_DIAMONDRAPIDS: + case PROCESSOR_GRANDRIDGE: + case PROCESSOR_CLEARWATERFOREST: + case PROCESSOR_ARROWLAKE: + case PROCESSOR_ARROWLAKE_S: + case PROCESSOR_PANTHERLAKE: return 6; default: @@ -487,6 +497,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case PROCESSOR_HASWELL: case PROCESSOR_TREMONT: case PROCESSOR_ALDERLAKE: + case PROCESSOR_INTEL: case PROCESSOR_GENERIC: /* Stack engine allows to execute push&pop instructions in parall. */ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) @@ -509,7 +520,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, break; case PROCESSOR_SILVERMONT: - case PROCESSOR_INTEL: if (!reload_completed) return cost; diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index c857e76..a86cbad 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -31,7 +31,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - Updating ix86_issue_rate and ix86_adjust_cost in i386.md - possibly updating ia32_multipass_dfa_lookahead, ix86_sched_reorder and ix86_sched_init_global if those tricks are needed. - - Tunning the flags bellow. Those are split into sections and each + - tuning flags below; those are split into sections and each section is very roughly ordered by importance. */ /*****************************************************************************/ @@ -87,9 +87,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, several insns to break false dependency on the dest register for GLC micro-architecture. */ DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC, - "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS - | m_GRANITERAPIDS_D | m_DIAMONDRAPIDS | m_CORE_HYBRID - | m_CORE_ATOM) + "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE) /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies are resolved on SSE register parts instead of whole registers, so we may @@ -574,6 +572,11 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV, "sse_movcc_use_blendv", ~m_CORE_ATOM) +/* X86_TUNE_V4SI_REDUCTION_PREFER_SHUFD: Prefer pshuf to reduce V16QI, + V8HI, V8HI, V4SI, V4FI, V2DI modes when lshr are costlier. */ +DEF_TUNE (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF, + "sse_reduction_prefer_pshuf", m_ZNVER4 | m_ZNVER5) + /*****************************************************************************/ /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ /*****************************************************************************/ @@ -636,6 +639,11 @@ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", DEF_TUNE (X86_TUNE_AVX512_TWO_EPILOGUES, "avx512_two_epilogues", m_ZNVER4 | m_ZNVER5) +/* X86_TUNE_AVX512_MAKED_EPILOGUES: Use two masked vector epilogues + when fit. */ +DEF_TUNE (X86_TUNE_AVX512_MASKED_EPILOGUES, "avx512_masked_epilogues", + m_ZNVER4 | m_ZNVER5) + /*****************************************************************************/ /*****************************************************************************/ /* Historical relics: tuning flags that helps a specific old CPU designs */ |